diff --git a/.gitignore b/.gitignore index eb5a316c..77746140 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ target +/config.toml +.gitlab-ci-local/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..f3efafbe --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,30 @@ +image: "redoxos/redoxer:latest" + +variables: + GIT_SUBMODULE_STRATEGY: recursive + +stages: + - host + - build + - test + # TODO: benchmarks and profiling (maybe manually enabled for relevant MRs)? + +build: + stage: build + script: + - mkdir -p target/${ARCH} + - TARGET=${ARCH}-unknown-redox redoxer env make BUILD=target/${ARCH} + parallel: + matrix: + - ARCH: [x86_64, i686, aarch64, riscv64gc] + +fmt: + stage: host + script: + - rustup component add rustfmt-preview + - cargo fmt -- --check + +unit_test: + stage: test + script: + - TARGET=x86_64-unknown-redox redoxer test diff --git a/.gitmodules b/.gitmodules index 94e58ada..0a0f6ac8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,11 @@ -[submodule "syscall"] - path = syscall - url = https://gitlab.redox-os.org/redox-os/syscall.git [submodule "slab_allocator"] path = slab_allocator url = https://gitlab.redox-os.org/redox-os/slab_allocator +[submodule "rmm"] + path = rmm + url = https://gitlab.redox-os.org/redox-os/rmm.git + branch = master +[submodule "redox-path"] + path = redox-path + url = https://gitlab.redox-os.org/redox-os/redox-path.git + branch = main diff --git a/.helix/config.toml b/.helix/config.toml new file mode 100644 index 00000000..a1ec3e0a --- /dev/null +++ b/.helix/config.toml @@ -0,0 +1,2 @@ +[editor] +auto-format = false diff --git a/.helix/languages.toml b/.helix/languages.toml new file mode 100644 index 00000000..c86c7b8a --- /dev/null +++ b/.helix/languages.toml @@ -0,0 +1,13 @@ +[[language]] +name = "rust" + +[[language-server.rust-analyzer.config.cargo]] +extraEnv = ["RUST_TARGET_PATH=targets"] +# Select one of targets to make lsp work for your confguration +# Do not commit this change +# TODO: find a better way to do this +# target = "aarch64-unknown-kernel" + +[[language-server.rust-analyzer.config.check]] +targets = ["x86_64-unknown-kernel", "i686-unknown-kernel", "aarch64-unknown-kernel"] + diff --git a/Cargo.lock b/Cargo.lock index db7f9e16..be23923f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,552 +1,502 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 4 + [[package]] -name = "aho-corasick" -version = "0.7.3" +name = "ahash" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ - "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "once_cell", + "version_check", + "zerocopy", ] [[package]] -name = "ansi_term" -version = "0.11.0" +name = "arrayvec" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "autocfg" -version = "0.1.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] -name = "backtrace" -version = "0.3.15" +name = "bit_field" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-demangle 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61" [[package]] -name = "backtrace-sys" -version = "0.1.28" +name = "bitfield" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cc 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "46afbd2983a5d5a7bd740ccb198caf5b82f45c40c09c0eed36052d91cb92e719" [[package]] name = "bitflags" -version = "0.7.0" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "0.9.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] -name = "bitflags" -version = "1.0.4" +name = "byteorder" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] -name = "cargo_metadata" -version = "0.5.8" +name = "cc" +version = "1.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" dependencies = [ - "error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", + "shlex", ] -[[package]] -name = "cc" -version = "1.0.35" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "cfg-if" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "clippy" -version = "0.0.209" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "clippy_lints 0.0.209 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "clippy_lints" -version = "0.0.209" +name = "equivalent" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cargo_metadata 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)", - "if_chain 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "itertools 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "pulldown-cmark 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "quine-mc_cluskey 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", - "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", - "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] -name = "either" -version = "1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" +name = "fdt" +version = "0.2.0-alpha1" +source = "git+https://github.com/repnop/fdt.git?rev=2fb1409edd1877c714a0aa36b6a7c5351004be54#2fb1409edd1877c714a0aa36b6a7c5351004be54" [[package]] -name = "error-chain" -version = "0.11.0" +name = "goblin" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d20fd25aa456527ce4f544271ae4fea65d2eda4a6561ea56f39fb3ee4f7e3884" dependencies = [ - "backtrace 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", + "plain", + "scroll", ] [[package]] -name = "getopts" -version = "0.2.18" +name = "hashbrown" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "ahash", ] [[package]] -name = "goblin" -version = "0.0.21" +name = "hashbrown" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "plain 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "scroll 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" [[package]] -name = "idna" -version = "0.1.5" +name = "indexmap" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "equivalent", + "hashbrown 0.15.3", ] [[package]] -name = "if_chain" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" +name = "kernel" +version = "0.5.12" +dependencies = [ + "arrayvec", + "bitfield", + "bitflags 2.9.0", + "byteorder", + "cc", + "fdt", + "goblin", + "hashbrown 0.14.5", + "indexmap", + "linked_list_allocator 0.9.1", + "log", + "raw-cpuid", + "redox-path", + "redox_syscall", + "rmm", + "rustc-cfg", + "rustc-demangle", + "sbi-rt", + "slab", + "slab_allocator", + "spin 0.9.8", + "spinning_top 0.3.0", + "toml", + "x86", +] [[package]] -name = "itertools" -version = "0.7.11" +name = "linked_list_allocator" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47de1a43fad0250ee197e9e124e5b5deab3d7b39d4428ae8a6d741ceb340c362" dependencies = [ - "either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "spin 0.5.2", ] [[package]] -name = "itoa" -version = "0.4.3" +name = "linked_list_allocator" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "kernel" -version = "0.1.54" +checksum = "549ce1740e46b291953c4340adcd74c59bcf4308f4cac050fd33ba91b7168f4a" dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "clippy 0.0.209 (registry+https://github.com/rust-lang/crates.io-index)", - "goblin 0.0.21 (registry+https://github.com/rust-lang/crates.io-index)", - "linked_list_allocator 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "raw-cpuid 4.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.56", - "rustc-demangle 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", - "slab_allocator 0.3.1", - "spin 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", - "x86 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "spinning_top 0.2.5", ] [[package]] -name = "lazy_static" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "libc" -version = "0.2.51" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "linked_list_allocator" -version = "0.6.4" +name = "lock_api" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ - "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "scopeguard", ] [[package]] -name = "matches" -version = "0.1.8" +name = "log" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "memchr" -version = "2.2.0" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] -name = "percent-encoding" -version = "1.0.1" +name = "once_cell" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "plain" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" [[package]] name = "proc-macro2" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "pulldown-cmark" -version = "0.1.2" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ - "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", - "getopts 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-ident", ] -[[package]] -name = "quine-mc_cluskey" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "quote" -version = "0.6.11" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ - "proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", ] [[package]] name = "raw-cpuid" -version = "2.0.2" +version = "10.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" dependencies = [ - "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.3.2", ] [[package]] -name = "raw-cpuid" -version = "4.0.0" +name = "redox-path" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "64072665120942deff5fd5425d6c1811b854f4939e7f1c01ce755f64432bbea7" [[package]] name = "redox_syscall" -version = "0.1.56" - -[[package]] -name = "regex" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" +version = "0.5.12" +source = "git+https://gitlab.redox-os.org/redox-os/syscall.git?branch=master#fe32c6b89dae51e609d5c53880dec1834ec9bde0" dependencies = [ - "aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 2.9.0", ] [[package]] -name = "regex-syntax" -version = "0.6.6" +name = "rmm" +version = "0.1.0" + +[[package]] +name = "rustc-cfg" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ddf7a5e441e8003a5a88aab97f1c6113043ddde252d789ef9dea3871b78633a" dependencies = [ - "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "thiserror", ] [[package]] name = "rustc-demangle" -version = "0.1.13" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] -name = "rustc_version" -version = "0.2.3" +name = "sbi-rt" +version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fbaa69be1eedc61c426e6d489b2260482e928b465360576900d52d496a58bd0" dependencies = [ - "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sbi-spec", ] [[package]] -name = "ryu" -version = "0.2.7" +name = "sbi-spec" +version = "0.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e36312fb5ddc10d08ecdc65187402baba4ac34585cb9d1b78522ae2358d890" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scroll" -version = "0.9.2" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "fda28d4b4830b807a8b43f7b0e6b5df875311b3e7621d84577188c175b6ec1ec" [[package]] -name = "semver" -version = "0.9.0" +name = "serde" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ - "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive", ] [[package]] -name = "semver-parser" -version = "0.7.0" +name = "serde_derive" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "serde" -version = "1.0.90" +name = "serde_spanned" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +dependencies = [ + "serde", +] [[package]] -name = "serde_derive" -version = "1.0.90" +name = "shlex" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.30 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] -name = "serde_json" -version = "1.0.39" +name = "slab" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ - "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", ] [[package]] name = "slab_allocator" version = "0.3.1" dependencies = [ - "linked_list_allocator 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "spin 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", + "linked_list_allocator 0.6.6", + "spin 0.4.10", ] -[[package]] -name = "smallvec" -version = "0.6.9" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "spin" version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ceac490aa12c567115b40b7b7fceca03a6c9d53d5defea066123debc83c5dc1f" [[package]] name = "spin" -version = "0.5.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] -name = "syn" -version = "0.15.30" +name = "spin" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" dependencies = [ - "proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lock_api", ] [[package]] -name = "thread_local" -version = "0.3.6" +name = "spinning_top" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b9eb1a2f4c41445a3a0ff9abc5221c5fcd28e1f13cd7c0397706f9ac938ddb0" dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lock_api", ] [[package]] -name = "toml" -version = "0.4.10" +name = "spinning_top" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300" dependencies = [ - "serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", + "lock_api", ] [[package]] -name = "ucd-util" -version = "0.1.3" +name = "syn" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] [[package]] -name = "unicode-bidi" -version = "0.3.4" +name = "thiserror" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "thiserror-impl", ] [[package]] -name = "unicode-normalization" -version = "0.1.8" +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "unicode-width" -version = "0.1.5" +name = "toml" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] [[package]] -name = "unicode-xid" -version = "0.1.0" +name = "toml_datetime" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" +dependencies = [ + "serde", +] [[package]] -name = "url" -version = "1.7.2" +name = "toml_edit" +version = "0.22.26" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" dependencies = [ - "idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", ] [[package]] -name = "utf8-ranges" -version = "1.0.2" +name = "toml_write" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" [[package]] -name = "winapi" -version = "0.3.7" +name = "unicode-ident" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "winnow" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9fb597c990f03753e08d3c29efbfcf2019a003b4bf4ba19225c158e1549f0f3" dependencies = [ - "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr", ] [[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" +name = "x86" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55b5be8cc34d017d8aabec95bc45a43d0f20e8b2a31a453cabc804fe996f8dca" +dependencies = [ + "bit_field", + "bitflags 1.3.2", + "raw-cpuid", +] [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "zerocopy" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] [[package]] -name = "x86" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "raw-cpuid 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[metadata] -"checksum aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e6f484ae0c99fec2e858eb6134949117399f222608d84cadb3f58c1f97c2364c" -"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -"checksum autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a6d640bee2da49f60a4068a7fae53acde8982514ab7bae8b8cea9e88cbcfd799" -"checksum backtrace 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "f106c02a3604afcdc0df5d36cc47b44b55917dbaf3d808f71c163a0ddba64637" -"checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6" -"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" -"checksum bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5" -"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" -"checksum cargo_metadata 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1efca0b863ca03ed4c109fb1c55e0bc4bbeb221d3e103d86251046b06a526bd0" -"checksum cc 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)" = "5e5f3fee5eeb60324c2781f1e41286bdee933850fff9b3c672587fed5ec58c83" -"checksum cfg-if 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "11d43355396e872eefb45ce6342e4374ed7bc2b3a502d1b28e36d6e23c05d1f4" -"checksum clippy 0.0.209 (registry+https://github.com/rust-lang/crates.io-index)" = "fe56cba96f8d67cd3af996bd2c61fbfea263cc555db9180dc1f7413418454c7d" -"checksum clippy_lints 0.0.209 (registry+https://github.com/rust-lang/crates.io-index)" = "891679ac4d0890425ce9aa4db6ab7c05a60506048fb5e0fc2ae2eeaeb02626e8" -"checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b" -"checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3" -"checksum getopts 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "0a7292d30132fb5424b354f5dc02512a86e4c516fe544bb7a25e7f266951b797" -"checksum goblin 0.0.21 (registry+https://github.com/rust-lang/crates.io-index)" = "6a4013e9182f2345c6b7829b9ef6e670bce0dfca12c6f974457ed2160c2c7fe9" -"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" -"checksum if_chain 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "4bac95d9aa0624e7b78187d6fb8ab012b41d9f6f54b1bcb61e61c4845f8357ec" -"checksum itertools 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)" = "0d47946d458e94a1b7bcabbf6521ea7c037062c81f534615abcad76e84d4970d" -"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" -"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" -"checksum libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)" = "bedcc7a809076656486ffe045abeeac163da1b558e963a31e29fbfbeba916917" -"checksum linked_list_allocator 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "47314ec1d29aa869ee7cb5a5be57be9b1055c56567d59c3fb6689926743e0bea" -"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" -"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39" -"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" -"checksum plain 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" -"checksum proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)" = "4d317f9caece796be1980837fd5cb3dfec5613ebdb04ad0956deea83ce168915" -"checksum pulldown-cmark 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d6fdf85cda6cadfae5428a54661d431330b312bc767ddbc57adbedc24da66e32" -"checksum quine-mc_cluskey 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "07589615d719a60c8dd8a4622e7946465dfef20d1a428f969e3443e7386d5f45" -"checksum quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "cdd8e04bd9c52e0342b406469d494fcb033be4bdbe5c606016defbb1681411e1" -"checksum raw-cpuid 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13b844e4049605ff38fed943f5c7b2c691fad68d9d5bf074d2720554c4e48246" -"checksum raw-cpuid 4.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "90e0d3209fac374e168cef2d8806dde7b31ef0ee82a965bcc0bec562c078a6f5" -"checksum regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "559008764a17de49a3146b234641644ed37d118d1ef641a0bb573d146edc6ce0" -"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96" -"checksum rustc-demangle 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "adacaae16d02b6ec37fdc7acfcddf365978de76d1983d3ee22afc260e1ca9619" -"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" -"checksum scroll 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2f84d114ef17fd144153d608fba7c446b0145d038985e7a8cc5d08bb0ce20383" -"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" -"checksum serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)" = "aa5f7c20820475babd2c077c3ab5f8c77a31c15e16ea38687b4c02d3e48680f4" -"checksum serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)" = "58fc82bec244f168b23d1963b45c8bf5726e9a15a9d146a067f9081aeed2de79" -"checksum serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)" = "5a23aa71d4a4d43fdbfaac00eff68ba8a06a51759a89ac3304323e800c4dd40d" -"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be" -"checksum spin 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "ceac490aa12c567115b40b7b7fceca03a6c9d53d5defea066123debc83c5dc1f" -"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" -"checksum syn 0.15.30 (registry+https://github.com/rust-lang/crates.io-index)" = "66c8865bf5a7cbb662d8b011950060b3c8743dca141b054bf7195b20d314d8e2" -"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" -"checksum toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "758664fc71a3a69038656bee8b6be6477d2a6c315a6b81f7081f591bffa4111f" -"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" -"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -"checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" -"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" -"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" -"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" -"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" -"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770" -"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -"checksum x86 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "178718d3f2c7dd98d44f8e353b0ccc8c89b2e81e31e5eed93e7fdf5f36db7a13" +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index fc579efd..9ea701a0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,44 +1,91 @@ [package] name = "kernel" -version = "0.1.54" +version = "0.5.12" build = "build.rs" -edition = "2018" +edition = "2021" -[lib] -name = "kernel" -path = "src/lib.rs" -crate-type = ["staticlib"] +[build-dependencies] +cc = "1.0" +rustc-cfg = "0.5" +toml = "0.8" [dependencies] -bitflags = "1.0.3" -clippy = { version = "0.0.209", optional = true } -linked_list_allocator = "0.6.2" -raw-cpuid = "4.0.0" -redox_syscall = { path = "syscall" } +bitflags = "2" +bitfield = "0.13.2" +hashbrown = { version = "0.14.3", default-features = false, features = ["ahash", "inline-more"] } +linked_list_allocator = "0.9.0" +log = "0.4" +redox-path = "0.2.0" +redox_syscall = { git = "https://gitlab.redox-os.org/redox-os/syscall.git", branch = "master", default-features = false } slab_allocator = { path = "slab_allocator", optional = true } -spin = "0.4.8" +spin = "0.9.8" +spinning_top = { version = "0.3", features = ["arc_lock"] } +rmm = { path = "rmm", default-features = false } +arrayvec = { version = "0.7.4", default-features = false } +slab = { version = "0.4", default-features = false } +# TODO: Remove +indexmap = { version = "2.5.0", default-features = false } [dependencies.goblin] -version = "0.0.21" +version = "0.2.1" default-features = false features = ["elf32", "elf64"] [dependencies.rustc-demangle] -version = "0.1.13" +version = "0.1.16" default-features = false -[dependencies.x86] -version = "0.9.0" -default-features = false +[target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] +byteorder = { version = "1", default-features = false } +fdt = { git = "https://github.com/repnop/fdt.git", rev = "2fb1409edd1877c714a0aa36b6a7c5351004be54" } + +[target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies] +raw-cpuid = "10.2.0" +x86 = { version = "0.47.0", default-features = false } + +[target.'cfg(any(target_arch = "riscv64", target_arch = "riscv32"))'.dependencies] +sbi-rt = "0.0.3" [features] -default = ["serial_debug"] +default = [ + "acpi", + #TODO: issues with Alder Lake and newer CPUs: "multi_core", + "graphical_debug", + "serial_debug", + "self_modifying", + "x86_kvm_pv", +] + +# Activates some limited code-overwriting optimizations, based on CPU features. +self_modifying = [] + acpi = [] -doc = [] graphical_debug = [] -live = [] -multi_core = [] +lpss_debug = [] +multi_core = ["acpi"] +profiling = [] +#TODO: remove when threading issues are fixed pti = [] qemu_debug = [] serial_debug = [] +system76_ec_debug = [] slab = ["slab_allocator"] +sys_stat = [] +x86_kvm_pv = [] + +debugger = ["syscall_debug"] +syscall_debug = [] + +sys_fdstat = [] + +[profile.dev] +# Avoids having to define the eh_personality lang item and reduces kernel size +panic = "abort" + +[profile.release] +# Avoids having to define the eh_personality lang item and reduces kernel size +panic = "abort" + +lto = true + +debug = "full" diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..5703ee2f --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ +SOURCE:=$(dir $(realpath $(lastword $(MAKEFILE_LIST)))) +BUILD?=$(CURDIR) +export RUST_TARGET_PATH=$(SOURCE)/targets + +ifeq ($(TARGET),) + ARCH?=$(shell uname -m) +else + ARCH?=$(shell echo "$(TARGET)" | cut -d - -f1) +endif + +ifeq ($(ARCH),riscv64gc) + override ARCH:=riscv64 +endif +GNU_TARGET=$(ARCH)-unknown-redox + + +all: $(BUILD)/kernel $(BUILD)/kernel.sym + +LD_SCRIPT=$(SOURCE)/linkers/$(ARCH).ld +TARGET_SPEC=$(RUST_TARGET_PATH)/$(ARCH)-unknown-kernel.json + +$(BUILD)/kernel.all: $(LD_SCRIPT) $(TARGET_SPEC) $(shell find $(SOURCE) -name "*.rs" -type f) + cargo rustc \ + --bin kernel \ + --manifest-path "$(SOURCE)/Cargo.toml" \ + --target "$(TARGET_SPEC)" \ + --release \ + -Z build-std=core,alloc \ + -- \ + -C link-arg=-T -Clink-arg="$(LD_SCRIPT)" \ + -C link-arg=-z -Clink-arg=max-page-size=0x1000 \ + --emit link="$(BUILD)/kernel.all" + +$(BUILD)/kernel.sym: $(BUILD)/kernel.all + $(GNU_TARGET)-objcopy \ + --only-keep-debug \ + "$(BUILD)/kernel.all" \ + "$(BUILD)/kernel.sym" + +$(BUILD)/kernel: $(BUILD)/kernel.all + $(GNU_TARGET)-objcopy \ + --strip-debug \ + "$(BUILD)/kernel.all" \ + "$(BUILD)/kernel" diff --git a/README.md b/README.md index 8bf172e7..cf54d099 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,81 @@ -# kernel +# Kernel Redox OS Microkernel +[![docs](https://img.shields.io/badge/docs-master-blue.svg)](https://docs.rs/redox_syscall/latest/syscall/) +[![SLOCs counter](https://tokei.rs/b1/github/redox-os/kernel?category=code)](https://github.com/XAMPPRocky/tokei) [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) -[![docs](https://img.shields.io/badge/docs-master-blue.svg)](https://doc.redox-os.org/kernel/kernel/) -[![](https://tokei.rs/b1/github/redox-os/kernel?category=code)](https://github.com/Aaronepower/tokei) -## Debugging the redox kernel +## Requirements -Running [qemu] with the `-s` flag will set up [qemu] to listen on port 1234 for -a [gdb] client to connect to it. To debug the redox kernel run. +* [`nasm`](https://nasm.us/) needs to be available on the PATH at build time. +## Building The Documentation + +Use this command: + +```sh +cargo doc --open --target x86_64-unknown-none ``` -make qemu debug=yes + +## Debugging + +### QEMU + +Running [QEMU](https://www.qemu.org) with the `-s` flag will set up QEMU to listen on port `1234` for a GDB client to connect to it. To debug the redox kernel run. + +```sh +make qemu gdb=yes ``` -This will start a VM with and listen on port 1234 for a [gdb] or [lldb] client. +This will start a virtual machine with and listen on port `1234` for a GDB or LLDB client. -## [gdb] +### GDB -If you are going to use [gdb], run the following to load debug symbols and connect -to your running kernel. +If you are going to use [GDB](https://www.gnu.org/software/gdb/), run these commands to load debug symbols and connect to your running kernel: ``` (gdb) symbol-file build/kernel.sym (gdb) target remote localhost:1234 ``` -## [lldb] +### LLDB -If you are going to use [lldb], run the following to start debugging. +If you are going to use [LLDB](https://lldb.llvm.org/), run these commands to start debugging: ``` (lldb) target create -s build/kernel.sym build/kernel (lldb) gdb-remote localhost:1234 ``` -## Debugging - After connecting to your kernel you can set some interesting breakpoints and `continue` the process. See your debuggers man page for more information on useful commands to run. -[qemu]: https://www.qemu.org -[gdb]: https://www.gnu.org/software/gdb/ -[lldb]: https://lldb.llvm.org/ +## Notes + +- Always use `foo.get(n)` instead of `foo[n]` and try to cover for the possibility of `Option::None`. Doing the regular way may work fine for applications, but never in the kernel. No possible panics should ever exist in kernel space, because then the whole OS would just stop working. + +- If you receive a kernel panic in QEMU, use `pkill qemu-system` to kill the frozen QEMU process. + +## How To Contribute + +To learn how to contribute to this system component you need to read the following document: + +- [CONTRIBUTING.md](https://gitlab.redox-os.org/redox-os/redox/-/blob/master/CONTRIBUTING.md) + +## Development + +To learn how to do development with this system component inside the Redox build system you need to read the [Build System](https://doc.redox-os.org/book/build-system-reference.html) and [Coding and Building](https://doc.redox-os.org/book/coding-and-building.html) pages. + +### How To Build + +To build this system component you need to download the Redox build system, you can learn how to do it on the [Building Redox](https://doc.redox-os.org/book/podman-build.html) page. + +This is necessary because they only work with cross-compilation to a Redox virtual machine, but you can do some testing from Linux. + +## Funding - _Unix-style Signals and Process Management_ + +This project is funded through [NGI Zero Core](https://nlnet.nl/core), a fund established by [NLnet](https://nlnet.nl) with financial support from the European Commission's [Next Generation Internet](https://ngi.eu) program. Learn more at the [NLnet project page](https://nlnet.nl/project/RedoxOS-Signals). + +[NLnet foundation logo](https://nlnet.nl) +[NGI Zero Logo](https://nlnet.nl/core) diff --git a/Xargo.toml b/Xargo.toml deleted file mode 100644 index ea3144eb..00000000 --- a/Xargo.toml +++ /dev/null @@ -1 +0,0 @@ -[dependencies.alloc] diff --git a/build.rs b/build.rs index acc24d22..2b326211 100644 --- a/build.rs +++ b/build.rs @@ -1,124 +1,90 @@ -use std::env; -use std::fs; -use std::io::{Error, Write}; -use std::path::Path; -use std::collections::HashMap; +use rustc_cfg::Cfg; +use std::{env, path::Path, process::Command}; +use toml::Table; +fn parse_kconfig(arch: &str) -> Option<()> { + println!("cargo:rerun-if-changed=config.toml"); -// View loc folder with subfolders, get listings -// Returns touple (folder_map, file_list) -// folder_map keys are folders, and values are lists of direct childs -// file_list is a vector of all detected files with full path -fn scan_folder(loc: &Path) -> (HashMap>, Vec) { - let mut folders: HashMap> = HashMap::new(); - let mut files: Vec = Vec::new(); - let mut current = Vec::new(); - - if loc.is_dir() { - for entry in fs::read_dir(loc).unwrap() { - let entry = entry.unwrap(); - let path = entry.path(); - let path_str = String::from(path.to_str().unwrap()).replace("\\", "/"); - - current.push(path_str.clone()); - - // if folder then scan recursively - if path.is_dir() { - let (d, mut f) = scan_folder(&path); - for (key, value) in d.into_iter() { - folders.insert(key, value); - } - - files.append(&mut f); - } else { - files.push(path_str); - } - } - - current.sort(); - folders.entry(String::from(loc.to_str().unwrap()).replace("\\", "/")).or_insert(current); - } else { - panic!("{:?} is not a folder!", loc); - } - - (folders, files) -} - -// Write folder/file information to output file -fn fill_from_location(f: &mut fs::File, loc: &Path ) -> Result<(), (Error)> { - let (folders, mut files) = scan_folder(loc); - let mut folder_it:Vec<_> = folders.keys().collect(); - - let loc_str = loc.to_str().unwrap(); - let mut idx = loc_str.len(); - - if !loc_str.ends_with("/") { - idx += 1; + assert!(Path::new("config.toml.example").try_exists().unwrap()); + if !Path::new("config.toml").try_exists().unwrap() { + std::fs::copy("config.toml.example", "config.toml").unwrap(); } - - folder_it.sort(); - files.sort(); - for dir in folder_it.iter() { - let strip: String = dir.chars().skip(idx).collect(); - write!(f, " files.insert(b\"{}\", (b\"", strip)?; - - // Write child elements separated with \n - let sub = folders.get(*dir).unwrap(); - let mut first = true; - for child in sub.iter() { - let idx = child.rfind('/').unwrap() + 1; - let (_, c) = child.split_at(idx); - if first { - write!(f, "{}", c)?; - first = false; - } else { - write!(f, "\\n{}", c)?; - } + let config_str = std::fs::read_to_string("config.toml").unwrap(); + let root: Table = toml::from_str(&config_str).unwrap(); + + let altfeatures = root + .get("arch")? + .as_table() + .unwrap() + .get(arch)? + .as_table() + .unwrap() + .get("features")? + .as_table() + .unwrap(); + + let self_modifying = env::var("CARGO_FEATURE_SELF_MODIFYING").is_ok(); + + for (name, value) in altfeatures { + let mut choice = value.as_str().unwrap(); + assert!(matches!(choice, "always" | "never" | "auto")); + + if !self_modifying && choice == "auto" { + choice = "never"; } - write!(f, "\", true));\n")?; - } - for name in files.iter() { - let (_, strip) = name.split_at(idx); - write!(f, " files.insert(b\"{}\", (include_bytes!(\"{}\"), false));\n", strip, name)?; + println!("cargo:rustc-cfg=cpu_feature_{choice}=\"{name}\""); } - Ok(()) + Some(()) } fn main() { println!("cargo:rustc-env=TARGET={}", env::var("TARGET").unwrap()); - println!("cargo:rerun-if-env-changed=INITFS_FOLDER"); let out_dir = env::var("OUT_DIR").unwrap(); - let dest_path = Path::new(&out_dir).join("gen.rs"); - let mut f = fs::File::create(&dest_path).unwrap(); - let src = env::var("INITFS_FOLDER"); - - // Write header - f.write_all(b" -mod gen { - use alloc::collections::BTreeMap; - pub fn gen() -> BTreeMap<&'static [u8], (&'static [u8], bool)> { - let mut files: BTreeMap<&'static [u8], (&'static [u8], bool)> = BTreeMap::new(); -").unwrap(); + let cfg = Cfg::of(env::var("TARGET").unwrap().as_str()).unwrap(); + let arch_str = cfg.target_arch.as_str(); - match src { - Ok(v) => { - println!("cargo:rerun-if-changed={}", v); - fill_from_location(&mut f, Path::new(&v)).unwrap() - }, - Err(e) => { - f.write_all( - b" files.clear();" // Silence mutability warning - ).unwrap(); - println!("cargo:warning=location not found: {}, please set proper INITFS_FOLDER.", e); + match arch_str { + "aarch64" => { + println!("cargo:rustc-cfg=dtb"); + } + "x86" => { + println!("cargo:rerun-if-changed=src/asm/x86/trampoline.asm"); + + let status = Command::new("nasm") + .arg("-f") + .arg("bin") + .arg("-o") + .arg(format!("{}/trampoline", out_dir)) + .arg("src/asm/x86/trampoline.asm") + .status() + .expect("failed to run nasm"); + if !status.success() { + panic!("nasm failed with exit status {}", status); + } + } + "x86_64" => { + println!("cargo:rerun-if-changed=src/asm/x86_64/trampoline.asm"); + + let status = Command::new("nasm") + .arg("-f") + .arg("bin") + .arg("-o") + .arg(format!("{}/trampoline", out_dir)) + .arg("src/asm/x86_64/trampoline.asm") + .status() + .expect("failed to run nasm"); + if !status.success() { + panic!("nasm failed with exit status {}", status); + } } + "riscv64" => { + println!("cargo:rustc-cfg=dtb"); + } + _ => (), } - f.write_all(b" - files - } -} -").unwrap(); + let _ = parse_kconfig(arch_str); } diff --git a/clippy.sh b/clippy.sh index f128f482..655f1b89 100755 --- a/clippy.sh +++ b/clippy.sh @@ -3,15 +3,5 @@ set -e export RUST_TARGET_PATH="${PWD}/targets" -xargo rustc --lib --release \ - --target x86_64-unknown-none \ - --features clippy \ - -- \ - -C soft-float -C debuginfo=2 \ - -W anonymous-parameters \ - -W trivial-numeric-casts \ - -W unused-extern-crates \ - -W unused-import-braces \ - -W unused-qualifications \ - -W variant-size-differences \ - -Z no-trans -Z extra-plugins=clippy +export RUSTFLAGS="-C soft-float -C debuginfo=2" +cargo clippy --lib --release --target x86_64-unknown-none "$@" diff --git a/config.toml.example b/config.toml.example new file mode 100644 index 00000000..b3cdb9e7 --- /dev/null +++ b/config.toml.example @@ -0,0 +1,7 @@ +[arch.x86_64.features] +smap = "auto" +fsgsbase = "auto" +xsave = "auto" +xsaveopt = "auto" + +# vim: ft=toml diff --git a/linkers/aarch64.ld b/linkers/aarch64.ld new file mode 100644 index 00000000..f679c9af --- /dev/null +++ b/linkers/aarch64.ld @@ -0,0 +1,62 @@ +ENTRY(kstart) +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64") + +KERNEL_OFFSET = 0xFFFFFF0000000000; + +SECTIONS { + . = KERNEL_OFFSET; + + . += SIZEOF_HEADERS; + + /* Force the zero page to be part of a segment by creating a + * dummy section in the zero page. + * Limine will map the segment with the lowest vaddr value at + * 0xFFFFFFFF80000000 even if the segment has a higher vaddr. + * As such without the zero page being part of a segment, the + * kernel would be loaded at an offset from the expected + * location. As the redox kernel is not currently relocatable, + * this would result in a crash. A similar issue likely exists + * with multiboot/multiboot2 and the paddr of the segment. + */ + .dummy ALIGN(8) : AT(ADDR(.dummy) - KERNEL_OFFSET) {} + + . = ALIGN(4096); + + .text : AT(ADDR(.text) - KERNEL_OFFSET) { + __text_start = .; + *(.text*) + __usercopy_start = .; + *(.usercopy-fns) + __usercopy_end = .; + . = ALIGN(4096); + __text_end = .; + } + + .rodata : AT(ADDR(.rodata) - KERNEL_OFFSET) { + __rodata_start = .; + *(.rodata*) + . = ALIGN(4096); + __rodata_end = .; + } + + .data : AT(ADDR(.data) - KERNEL_OFFSET) { + __data_start = .; + *(.data*) + . = ALIGN(4096); + __data_end = .; + __bss_start = .; + *(.bss*) + . = ALIGN(4096); + __bss_end = .; + } + + __end = .; + + /DISCARD/ : { + *(.comment*) + *(.eh_frame*) + *(.gcc_except_table*) + *(.note*) + *(.rel.eh_frame*) + } +} diff --git a/linkers/i686.ld b/linkers/i686.ld new file mode 100644 index 00000000..f7eb9c42 --- /dev/null +++ b/linkers/i686.ld @@ -0,0 +1,57 @@ +ENTRY(kstart) +OUTPUT_FORMAT(elf32-i386) + +KERNEL_OFFSET = 0xC0000000; + +SECTIONS { + . = KERNEL_OFFSET; + + . += SIZEOF_HEADERS; + + /* Force the zero page to be part of a segment by creating a + * dummy section in the zero page. + * Limine will map the segment with the lowest vaddr value at + * 0xFFFFFFFF80000000 even if the segment has a higher vaddr. + * As such without the zero page being part of a segment, the + * kernel would be loaded at an offset from the expected + * location. As the redox kernel is not currently relocatable, + * this would result in a crash. A similar issue likely exists + * with multiboot/multiboot2 and the paddr of the segment. + */ + .dummy : AT(ADDR(.dummy) - KERNEL_OFFSET) {} + + .text ALIGN(4K) : AT(ADDR(.text) - KERNEL_OFFSET) { + __text_start = .; + *(.text*) + __usercopy_start = .; + *(.usercopy-fns) + __usercopy_end = .; + } + + .rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET) { + __text_end = .; + __rodata_start = .; + *(.rodata*) + } + + .data ALIGN(4K) : AT(ADDR(.data) - KERNEL_OFFSET) { + __rodata_end = .; + __data_start = .; + *(.data*) + . = ALIGN(4K); + __data_end = .; + __bss_start = .; + *(.bss*) + . = ALIGN(4K); + } + + __end = .; + + /DISCARD/ : { + *(.comment*) + *(.eh_frame*) + *(.gcc_except_table*) + *(.note*) + *(.rel.eh_frame*) + } +} diff --git a/linkers/riscv64.ld b/linkers/riscv64.ld new file mode 100644 index 00000000..634b5c2c --- /dev/null +++ b/linkers/riscv64.ld @@ -0,0 +1,68 @@ +ENTRY(kstart) +OUTPUT_FORMAT("elf64-littleriscv", "elf64-littleriscv", "elf64-littleriscv" ) + +KERNEL_OFFSET = 0xFFFFFF0000000000; + +SECTIONS { + . = KERNEL_OFFSET; + + . += SIZEOF_HEADERS; + + /* Force the zero page to be part of a segment by creating a + * dummy section in the zero page. + * Linker will map the segment with the lowest vaddr value at + * 0xFFFFFF0000000000 even if the segment has a higher vaddr. + * As such without the zero page being part of a segment, the + * kernel would be loaded at an offset from the expected + * location. As the redox kernel is not currently relocatable, + * this would result in a crash. A similar issue likely exists + * with multiboot/multiboot2 and the paddr of the segment. + */ + .dummy ALIGN(8) : AT(ADDR(.dummy) - KERNEL_OFFSET) {} + + . = ALIGN(4096); + + .text : AT(ADDR(.text) - KERNEL_OFFSET) { + __text_start = .; + *(.early_init.text*) + . = ALIGN(4096); + *(.text*) + __usercopy_start = .; + *(.usercopy-fns) + __usercopy_end = .; + . = ALIGN(4096); + __text_end = .; + } + + .rodata : AT(ADDR(.rodata) - KERNEL_OFFSET) { + __rodata_start = .; + *(.rodata*) + . = ALIGN(4096); + __rodata_end = .; + } + + .data : AT(ADDR(.data) - KERNEL_OFFSET) { + __data_start = .; + *(.data*) + *(.sdata*) + . = ALIGN(4096); + __data_end = .; + *(.got*) + . = ALIGN(4096); + __bss_start = .; + *(.bss*) + *(.sbss*) + . = ALIGN(4096); + __bss_end = .; + } + + __end = .; + + /DISCARD/ : { + *(.comment*) + *(.eh_frame*) + *(.gcc_except_table*) + *(.note*) + *(.rel.eh_frame*) + } +} diff --git a/linkers/x86_64.ld b/linkers/x86_64.ld index e88c9d07..6610509a 100644 --- a/linkers/x86_64.ld +++ b/linkers/x86_64.ld @@ -1,49 +1,57 @@ ENTRY(kstart) OUTPUT_FORMAT(elf64-x86-64) -KERNEL_OFFSET = 0xffffff0000100000; +KERNEL_OFFSET = 0xFFFFFFFF80000000; SECTIONS { . = KERNEL_OFFSET; . += SIZEOF_HEADERS; - . = ALIGN(4096); - .text : AT(ADDR(.text) - KERNEL_OFFSET) { + /* Force the zero page to be part of a segment by creating a + * dummy section in the zero page. + * Limine will map the segment with the lowest vaddr value at + * 0xFFFFFFFF80000000 even if the segment has a higher vaddr. + * As such without the zero page being part of a segment, the + * kernel would be loaded at an offset from the expected + * location. As the redox kernel is not currently relocatable, + * this would result in a crash. A similar issue likely exists + * with multiboot/multiboot2 and the paddr of the segment. + */ + .dummy : AT(ADDR(.dummy) - KERNEL_OFFSET) {} + + .text ALIGN(4K) : AT(ADDR(.text) - KERNEL_OFFSET) { __text_start = .; *(.text*) - . = ALIGN(4096); - __text_end = .; + __usercopy_start = .; + *(.usercopy-fns) + __usercopy_end = .; } - .rodata : AT(ADDR(.rodata) - KERNEL_OFFSET) { + .rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET) { + __text_end = .; __rodata_start = .; *(.rodata*) - . = ALIGN(4096); - __rodata_end = .; + __altcode_start = .; + KEEP(*(.altcode*)) + __altcode_end = .; + . = ALIGN(8); + __altrelocs_start = .; + KEEP(*(.altrelocs*)) + __altrelocs_end = .; + __altfeatures_start = .; + KEEP(*(.altfeatures*)) + __altfeatures_end = .; } - .data : AT(ADDR(.data) - KERNEL_OFFSET) { + .data ALIGN(4K) : AT(ADDR(.data) - KERNEL_OFFSET) { + __rodata_end = .; __data_start = .; *(.data*) - . = ALIGN(4096); + . = ALIGN(4K); __data_end = .; __bss_start = .; *(.bss*) - . = ALIGN(4096); - __bss_end = .; - } - - .tdata : AT(ADDR(.tdata) - KERNEL_OFFSET) { - __tdata_start = .; - *(.tdata*) - . = ALIGN(4096); - __tdata_end = .; - __tbss_start = .; - *(.tbss*) - . += 8; - . = ALIGN(4096); - __tbss_end = .; } __end = .; diff --git a/rmm b/rmm new file mode 160000 index 00000000..63669069 --- /dev/null +++ b/rmm @@ -0,0 +1 @@ +Subproject commit 63669069f4eb2e3bf04c0399d28fb2e3e3c58166 diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 00000000..becbe9e6 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,21 @@ +blank_lines_lower_bound = 0 # default +blank_lines_upper_bound = 1 # default +brace_style = "SameLineWhere" # default +disable_all_formatting = false # default +edition = "2021" +empty_item_single_line = true # default +fn_single_line = false # default +force_explicit_abi = true # default +format_strings = false # default +hard_tabs = false # default +show_parse_errors = true # default +imports_granularity = "Crate" # default = Preserve +imports_indent = "Block" # default +imports_layout = "Mixed" # default +indent_style = "Block" # default +max_width = 100 # default +newline_style = "Unix" # default = Auto +skip_children = false # default +tab_spaces = 4 # default +trailing_comma = "Vertical" # default +where_single_line = false # default diff --git a/src/acpi/aml/dataobj.rs b/src/acpi/aml/dataobj.rs deleted file mode 100644 index e0f26211..00000000 --- a/src/acpi/aml/dataobj.rs +++ /dev/null @@ -1,187 +0,0 @@ -use alloc::vec::Vec; -use alloc::string::String; - -use super::AmlError; -use super::parser::{ AmlParseType, ParseResult, AmlExecutionContext, ExecutionState }; -use super::namespace::{ AmlValue, ObjectReference }; - -use super::type2opcode::{parse_def_buffer, parse_def_package, parse_def_var_package}; -use super::termlist::parse_term_arg; -use super::namestring::parse_super_name; - -pub fn parse_data_obj(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_computational_data, - parse_def_package, - parse_def_var_package - }; - - Err(AmlError::AmlInvalidOpCode) -} - -pub fn parse_data_ref_obj(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_data_obj, - parse_term_arg - }; - - match parse_super_name(data, ctx) { - Ok(res) => match res.val { - AmlValue::String(s) => Ok(AmlParseType { - val: AmlValue::ObjectReference(ObjectReference::Object(s)), - len: res.len - }), - _ => Ok(res) - }, - Err(e) => Err(e) - } -} - -pub fn parse_arg_obj(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - match data[0] { - 0x68 ... 0x6E => Ok(AmlParseType { - val: AmlValue::ObjectReference(ObjectReference::ArgObj(data[0] - 0x68)), - len: 1 as usize - }), - _ => Err(AmlError::AmlInvalidOpCode) - } -} - -pub fn parse_local_obj(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - match data[0] { - 0x68 ... 0x6E => Ok(AmlParseType { - val: AmlValue::ObjectReference(ObjectReference::LocalObj(data[0] - 0x60)), - len: 1 as usize - }), - _ => Err(AmlError::AmlInvalidOpCode) - } -} - -fn parse_computational_data(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - match data[0] { - 0x0A => Ok(AmlParseType { - val: AmlValue::Integer(data[1] as u64), - len: 2 as usize - }), - 0x0B => { - let res = (data[1] as u16) + - ((data[2] as u16) << 8); - - Ok(AmlParseType { - val: AmlValue::Integer(res as u64), - len: 3 as usize - }) - }, - 0x0C => { - let res = (data[1] as u32) + - ((data[2] as u32) << 8) + - ((data[3] as u32) << 16) + - ((data[4] as u32) << 24); - - Ok(AmlParseType { - val: AmlValue::Integer(res as u64), - len: 5 as usize - }) - }, - 0x0D => { - let mut cur_ptr: usize = 1; - let mut cur_string: Vec = vec!(); - - while data[cur_ptr] != 0x00 { - cur_string.push(data[cur_ptr]); - cur_ptr += 1; - } - - match String::from_utf8(cur_string) { - Ok(s) => Ok(AmlParseType { - val: AmlValue::String(s.clone()), - len: s.clone().len() + 2 - }), - Err(_) => Err(AmlError::AmlParseError("String data - invalid string")) - } - }, - 0x0E => { - let res = (data[1] as u64) + - ((data[2] as u64) << 8) + - ((data[3] as u64) << 16) + - ((data[4] as u64) << 24) + - ((data[5] as u64) << 32) + - ((data[6] as u64) << 40) + - ((data[7] as u64) << 48) + - ((data[8] as u64) << 56); - - Ok(AmlParseType { - val: AmlValue::Integer(res as u64), - len: 9 as usize - }) - }, - 0x00 => Ok(AmlParseType { - val: AmlValue::IntegerConstant(0 as u64), - len: 1 as usize - }), - 0x01 => Ok(AmlParseType { - val: AmlValue::IntegerConstant(1 as u64), - len: 1 as usize - }), - 0x5B => if data[1] == 0x30 { - Ok(AmlParseType { - val: AmlValue::IntegerConstant(2017_0630 as u64), - len: 2 as usize - }) - } else { - Err(AmlError::AmlInvalidOpCode) - }, - 0xFF => Ok(AmlParseType { - val: AmlValue::IntegerConstant(0xFFFF_FFFF_FFFF_FFFF), - len: 1 as usize - }), - _ => parse_def_buffer(data, ctx) - } -} diff --git a/src/acpi/aml/mod.rs b/src/acpi/aml/mod.rs deleted file mode 100644 index 332e5fbb..00000000 --- a/src/acpi/aml/mod.rs +++ /dev/null @@ -1,57 +0,0 @@ -//! # AML -//! Code to parse and execute AML tables - -use alloc::string::String; -use alloc::vec::Vec; -use core::str::FromStr; - -use super::sdt::Sdt; - -#[macro_use] -mod parsermacros; - -mod namespace; -mod termlist; -mod namespacemodifier; -mod pkglength; -mod namestring; -mod namedobj; -mod dataobj; -mod type1opcode; -mod type2opcode; -mod parser; - -use self::parser::AmlExecutionContext; -use self::termlist::parse_term_list; -pub use self::namespace::AmlValue; - -#[derive(Debug)] -pub enum AmlError { - AmlParseError(&'static str), - AmlInvalidOpCode, - AmlValueError, - AmlDeferredLoad, - AmlFatalError(u8, u16, AmlValue), - AmlHardFatal -} - -pub fn parse_aml_table(sdt: &Sdt) -> Result, AmlError> { - parse_aml_with_scope(sdt, String::from_str("\\").unwrap()) -} - -pub fn parse_aml_with_scope(sdt: &Sdt, scope: String) -> Result, AmlError> { - let data = sdt.data(); - let mut ctx = AmlExecutionContext::new(scope); - - parse_term_list(data, &mut ctx)?; - - Ok(ctx.namespace_delta) -} - -pub fn is_aml_table(sdt: &Sdt) -> bool { - if &sdt.signature == b"DSDT" || &sdt.signature == b"SSDT" { - true - } else { - false - } -} diff --git a/src/acpi/aml/namedobj.rs b/src/acpi/aml/namedobj.rs deleted file mode 100644 index 8422b419..00000000 --- a/src/acpi/aml/namedobj.rs +++ /dev/null @@ -1,1045 +0,0 @@ -use alloc::boxed::Box; -use alloc::string::String; -use alloc::btree_map::BTreeMap; - -use super::AmlError; -use super::parser::{ AmlParseType, ParseResult, AmlParseTypeGeneric, AmlExecutionContext, ExecutionState }; -use super::namespace::{AmlValue, FieldSelector, Method, get_namespace_string, - Accessor, BufferField, FieldUnit, Processor, PowerResource, OperationRegion, - Device, ThermalZone}; -use super::namestring::{parse_name_string, parse_name_seg}; -use super::termlist::{parse_term_arg, parse_object_list}; -use super::pkglength::parse_pkg_length; -use super::type2opcode::parse_def_buffer; - -#[derive(Debug, Copy, Clone)] -pub enum RegionSpace { - SystemMemory, - SystemIO, - PCIConfig, - EmbeddedControl, - SMBus, - SystemCMOS, - PciBarTarget, - IPMI, - GeneralPurposeIO, - GenericSerialBus, - UserDefined(u8) -} - -#[derive(Debug, Clone)] -pub struct FieldFlags { - access_type: AccessType, - lock_rule: bool, - update_rule: UpdateRule -} - -#[derive(Debug, Clone)] -pub enum AccessType { - AnyAcc, - ByteAcc, - WordAcc, - DWordAcc, - QWordAcc, - BufferAcc(AccessAttrib) -} - -#[derive(Debug, Clone)] -pub enum UpdateRule { - Preserve, - WriteAsOnes, - WriteAsZeros -} - -#[derive(Debug, Clone)] -pub struct NamedField { - name: String, - length: usize -} - -#[derive(Debug, Clone)] -pub struct AccessField { - access_type: AccessType, - access_attrib: AccessAttrib -} - -#[derive(Debug, Clone)] -pub enum AccessAttrib { - AttribBytes(u8), - AttribRawBytes(u8), - AttribRawProcessBytes(u8), - AttribQuick, - AttribSendReceive, - AttribByte, - AttribWord, - AttribBlock, - AttribProcessCall, - AttribBlockProcessCall -} - -pub fn parse_named_obj(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_def_bank_field, - parse_def_create_bit_field, - parse_def_create_byte_field, - parse_def_create_word_field, - parse_def_create_dword_field, - parse_def_create_qword_field, - parse_def_create_field, - parse_def_data_region, - parse_def_event, - parse_def_external, - parse_def_device, - parse_def_op_region, - parse_def_field, - parse_def_index_field, - parse_def_method, - parse_def_mutex, - parse_def_power_res, - parse_def_processor, - parse_def_thermal_zone - }; - - Err(AmlError::AmlInvalidOpCode) -} - -fn parse_def_bank_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 3 { - return Err(AmlError::AmlParseError("DefBankField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x87); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[2..])?; - let data = &data[2 + pkg_length_len .. 2 + pkg_length]; - - let region_name = parse_name_string(data, ctx)?; - let bank_name = parse_name_string(&data[2 + pkg_length_len + region_name.len .. 2 + pkg_length], ctx)?; - - let bank_value = parse_term_arg(&data[2 + pkg_length_len + region_name.len .. 2 + pkg_length], ctx)?; - - let flags_raw = data[2 + pkg_length_len + region_name.len + bank_name.len + bank_value.len]; - let mut flags = FieldFlags { - access_type: match flags_raw & 0x0F { - 0 => AccessType::AnyAcc, - 1 => AccessType::ByteAcc, - 2 => AccessType::WordAcc, - 3 => AccessType::DWordAcc, - 4 => AccessType::QWordAcc, - 5 => AccessType::BufferAcc(AccessAttrib::AttribByte), - _ => return Err(AmlError::AmlParseError("BankField - invalid access type")) - }, - lock_rule: (flags_raw & 0x10) == 0x10, - update_rule: match (flags_raw & 0x60) >> 5 { - 0 => UpdateRule::Preserve, - 1 => UpdateRule::WriteAsOnes, - 2 => UpdateRule::WriteAsZeros, - _ => return Err(AmlError::AmlParseError("BankField - invalid update rule")) - } - }; - - let selector = FieldSelector::Bank { - region: region_name.val.get_as_string()?, - bank_register: bank_name.val.get_as_string()?, - bank_selector: Box::new(bank_value.val) - }; - - parse_field_list(&data[3 + pkg_length_len + region_name.len + bank_name.len + bank_value.len .. - 2 + pkg_length], ctx, selector, &mut flags)?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + pkg_length - }) -} - -fn parse_def_create_bit_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefCreateBitField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x8D); - - let source_buf = parse_term_arg(&data[2..], ctx)?; - let bit_index = parse_term_arg(&data[2 + source_buf.len..], ctx)?; - let name = parse_name_string(&data[1 + source_buf.len + bit_index.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::BufferField(BufferField { - source_buf: Box::new(source_buf.val), - index: Box::new(bit_index.val), - length: Box::new(AmlValue::IntegerConstant(1)) - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len + source_buf.len + bit_index.len - }) -} - -fn parse_def_create_byte_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefCreateByteField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x8C); - - let source_buf = parse_term_arg(&data[2..], ctx)?; - let bit_index = parse_term_arg(&data[2 + source_buf.len..], ctx)?; - let name = parse_name_string(&data[1 + source_buf.len + bit_index.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::BufferField(BufferField { - source_buf: Box::new(source_buf.val), - index: Box::new(bit_index.val), - length: Box::new(AmlValue::IntegerConstant(8)) - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len + source_buf.len + bit_index.len - }) -} - -fn parse_def_create_word_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefCreateWordField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x8B); - - let source_buf = parse_term_arg(&data[2..], ctx)?; - let bit_index = parse_term_arg(&data[2 + source_buf.len..], ctx)?; - let name = parse_name_string(&data[1 + source_buf.len + bit_index.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::BufferField(BufferField { - source_buf: Box::new(source_buf.val), - index: Box::new(bit_index.val), - length: Box::new(AmlValue::IntegerConstant(16)) - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len + source_buf.len + bit_index.len - }) -} - -fn parse_def_create_dword_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefCreateDwordField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x8A); - - let source_buf = parse_term_arg(&data[2..], ctx)?; - let bit_index = parse_term_arg(&data[2 + source_buf.len..], ctx)?; - let name = parse_name_string(&data[1 + source_buf.len + bit_index.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - let _ = ctx.add_to_namespace(local_scope_string, AmlValue::BufferField(BufferField { - source_buf: Box::new(source_buf.val), - index: Box::new(bit_index.val), - length: Box::new(AmlValue::IntegerConstant(32)) - })); - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len + source_buf.len + bit_index.len - }) -} - -fn parse_def_create_qword_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefCreateQwordField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x8F); - - let source_buf = parse_term_arg(&data[2..], ctx)?; - let bit_index = parse_term_arg(&data[2 + source_buf.len..], ctx)?; - let name = parse_name_string(&data[1 + source_buf.len + bit_index.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::BufferField(BufferField { - source_buf: Box::new(source_buf.val), - index: Box::new(bit_index.val), - length: Box::new(AmlValue::IntegerConstant(64)) - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len + source_buf.len + bit_index.len - }) -} - -fn parse_def_create_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefCreateField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x13); - - let source_buf = parse_term_arg(&data[2..], ctx)?; - let bit_index = parse_term_arg(&data[2 + source_buf.len..], ctx)?; - let num_bits = parse_term_arg(&data[2 + source_buf.len + bit_index.len..], ctx)?; - let name = parse_name_string(&data[2 + source_buf.len + bit_index.len + num_bits.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::BufferField(BufferField { - source_buf: Box::new(source_buf.val), - index: Box::new(bit_index.val), - length: Box::new(num_bits.val) - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len + source_buf.len + bit_index.len + num_bits.len - }) -} - -fn parse_def_data_region(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefDataRegion - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - // TODO: Find the actual offset and length, once table mapping is implemented - parser_opcode_extended!(data, 0x88); - - let name = parse_name_string(&data[2..], ctx)?; - let signature = parse_term_arg(&data[2 + name.len..], ctx)?; - let oem_id = parse_term_arg(&data[2 + name.len + signature.len..], ctx)?; - let oem_table_id = parse_term_arg(&data[2 + name.len + signature.len + oem_id.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::OperationRegion(OperationRegion { - region: RegionSpace::SystemMemory, - offset: Box::new(AmlValue::IntegerConstant(0)), - len: Box::new(AmlValue::IntegerConstant(0)), - accessor: Accessor { - read: |_x| 0 as u64, - write: |_x, _y| () - }, - accessed_by: None - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len + signature.len + oem_id.len + oem_table_id.len - }) -} - -fn parse_def_event(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefEvent - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x02); - - let name = parse_name_string(&data[2..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - ctx.add_to_namespace(local_scope_string, AmlValue::Event(0))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len - }) -} - -fn parse_def_device(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefDevice - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - // TODO: How to handle local context deferreds - parser_opcode_extended!(data, 0x82); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[2..])?; - let name = parse_name_string(&data[2 + pkg_length_len .. 2 + pkg_length], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - let mut local_ctx = AmlExecutionContext::new(local_scope_string.clone()); - - parse_object_list(&data[2 + pkg_length_len + name.len .. 2 + pkg_length], &mut local_ctx)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::Device(Device { - obj_list: local_ctx.namespace_delta.clone(), - notify_methods: BTreeMap::new() - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + pkg_length - }) -} - -fn parse_def_op_region(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 3 { - return Err(AmlError::AmlParseError("DefOpRegion - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x80); - - let name = parse_name_string(&data[2..], ctx)?; - let region = match data[2 + name.len] { - 0x00 => RegionSpace::SystemMemory, - 0x01 => RegionSpace::SystemIO, - 0x02 => RegionSpace::PCIConfig, - 0x03 => RegionSpace::EmbeddedControl, - 0x04 => RegionSpace::SMBus, - 0x05 => RegionSpace::SystemCMOS, - 0x06 => RegionSpace::PciBarTarget, - 0x07 => RegionSpace::IPMI, - 0x08 => RegionSpace::GeneralPurposeIO, - 0x09 => RegionSpace::GenericSerialBus, - 0x80 ... 0xFF => RegionSpace::UserDefined(data[2 + name.len]), - _ => return Err(AmlError::AmlParseError("OpRegion - invalid region")) - }; - - let offset = parse_term_arg(&data[3 + name.len..], ctx)?; - let len = parse_term_arg(&data[3 + name.len + offset.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - ctx.add_to_namespace(local_scope_string, AmlValue::OperationRegion(OperationRegion { - region: region, - offset: Box::new(offset.val), - len: Box::new(len.val), - accessor: Accessor { - read: |_x| 0 as u64, - write: |_x, _y| () - }, - accessed_by: None - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 3 + name.len + offset.len + len.len - }) -} - -fn parse_def_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 3 { - return Err(AmlError::AmlParseError("DefField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x81); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[2..])?; - let name = parse_name_string(&data[2 + pkg_length_len .. 2 + pkg_length], ctx)?; - - let flags_raw = data[2 + pkg_length_len + name.len]; - let mut flags = FieldFlags { - access_type: match flags_raw & 0x0F { - 0 => AccessType::AnyAcc, - 1 => AccessType::ByteAcc, - 2 => AccessType::WordAcc, - 3 => AccessType::DWordAcc, - 4 => AccessType::QWordAcc, - 5 => AccessType::BufferAcc(AccessAttrib::AttribByte), - _ => return Err(AmlError::AmlParseError("Field - Invalid access type")) - }, - lock_rule: (flags_raw & 0x10) == 0x10, - update_rule: match (flags_raw & 0x60) >> 5 { - 0 => UpdateRule::Preserve, - 1 => UpdateRule::WriteAsOnes, - 2 => UpdateRule::WriteAsZeros, - _ => return Err(AmlError::AmlParseError("Field - Invalid update rule")) - } - }; - - let selector = FieldSelector::Region(name.val.get_as_string()?); - - parse_field_list(&data[3 + pkg_length_len + name.len .. 2 + pkg_length], ctx, selector, &mut flags)?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + pkg_length - }) -} - -fn parse_def_index_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 3 { - return Err(AmlError::AmlParseError("DefIndexField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x86); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[2..])?; - let idx_name = parse_name_string(&data[2 + pkg_length_len .. 2 + pkg_length], ctx)?; - let data_name = parse_name_string(&data[2 + pkg_length_len + idx_name.len .. 2 + pkg_length], ctx)?; - - let flags_raw = data[2 + pkg_length_len + idx_name.len + data_name.len]; - let mut flags = FieldFlags { - access_type: match flags_raw & 0x0F { - 0 => AccessType::AnyAcc, - 1 => AccessType::ByteAcc, - 2 => AccessType::WordAcc, - 3 => AccessType::DWordAcc, - 4 => AccessType::QWordAcc, - 5 => AccessType::BufferAcc(AccessAttrib::AttribByte), - _ => return Err(AmlError::AmlParseError("IndexField - Invalid access type")) - }, - lock_rule: (flags_raw & 0x10) == 0x10, - update_rule: match (flags_raw & 0x60) >> 5 { - 0 => UpdateRule::Preserve, - 1 => UpdateRule::WriteAsOnes, - 2 => UpdateRule::WriteAsZeros, - _ => return Err(AmlError::AmlParseError("IndexField - Invalid update rule")) - } - }; - - let selector = FieldSelector::Index { - index_selector: idx_name.val.get_as_string()?, - data_selector: data_name.val.get_as_string()? - }; - - parse_field_list(&data[3 + pkg_length_len + idx_name.len + data_name.len .. 2 + pkg_length], - ctx, selector, &mut flags)?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + pkg_length - }) -} - -fn parse_field_list(data: &[u8], - ctx: &mut AmlExecutionContext, - selector: FieldSelector, - flags: &mut FieldFlags) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - let mut current_offset: usize = 0; - let mut field_offset: usize = 0; - let mut connection = AmlValue::Uninitialized; - - while current_offset < data.len() { - let res = parse_field_element(&data[current_offset..], ctx, selector.clone(), &mut connection, flags, &mut field_offset)?; - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - current_offset += res.len; - } - - Ok(AmlParseType { - val: AmlValue::None, - len: data.len() - }) -} - -fn parse_field_element(data: &[u8], - ctx: &mut AmlExecutionContext, - selector: FieldSelector, - connection: &mut AmlValue, - flags: &mut FieldFlags, - offset: &mut usize) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - let length = if let Ok(field) = parse_named_field(data, ctx) { - let local_scope_string = get_namespace_string(ctx.scope.clone(), AmlValue::String(field.val.name.clone()))?; - - ctx.add_to_namespace(local_scope_string, AmlValue::FieldUnit(FieldUnit { - selector: selector.clone(), - connection: Box::new(connection.clone()), - flags: flags.clone(), - offset: offset.clone(), - length: field.val.length - }))?; - - *offset += field.val.length; - field.len - } else if let Ok(field) = parse_reserved_field(data, ctx) { - *offset += field.val; - field.len - } else if let Ok(field) = parse_access_field(data, ctx) { - match field.val.access_type { - AccessType::BufferAcc(_) => - flags.access_type = AccessType::BufferAcc(field.val.access_attrib.clone()), - ref a => flags.access_type = a.clone() - } - - field.len - } else if let Ok(field) = parse_connect_field(data, ctx) { - *connection = field.val.clone(); - field.len - } else { - return Err(AmlError::AmlInvalidOpCode); - }; - - Ok(AmlParseType { - val: AmlValue::None, - len: length - }) -} - -fn parse_named_field(data: &[u8], _ctx: &mut AmlExecutionContext) -> Result, AmlError> { - if data.len() < 4 { - return Err(AmlError::AmlParseError("NamedField - data truncated")) - } - - let (name_seg, name_seg_len) = parse_name_seg(&data[0..4])?; - let name = match String::from_utf8(name_seg) { - Ok(s) => s, - Err(_) => return Err(AmlError::AmlParseError("NamedField - invalid name")) - }; - let (length, length_len) = parse_pkg_length(&data[4..])?; - - Ok(AmlParseTypeGeneric { - val: NamedField { name, length }, - len: name_seg_len + length_len - }) -} - -fn parse_reserved_field(data: &[u8], _ctx: &mut AmlExecutionContext) -> Result, AmlError> { - if data.len() < 1 { - return Err(AmlError::AmlParseError("ReservedField - data truncated")) - } - - parser_opcode!(data, 0x00); - - let (length, length_len) = parse_pkg_length(&data[1..])?; - Ok(AmlParseTypeGeneric { - val: length, - len: 1 + length_len - }) -} - -fn parse_access_field(data: &[u8], _ctx: &mut AmlExecutionContext) -> Result, AmlError> { - if data.len() < 3 { - return Err(AmlError::AmlParseError("AccessField - data truncated")) - } - - parser_opcode!(data, 0x01, 0x03); - - let flags_raw = data[1]; - let access_type = match flags_raw & 0x0F { - 0 => AccessType::AnyAcc, - 1 => AccessType::ByteAcc, - 2 => AccessType::WordAcc, - 3 => AccessType::DWordAcc, - 4 => AccessType::QWordAcc, - 5 => AccessType::BufferAcc(AccessAttrib::AttribByte), - _ => return Err(AmlError::AmlParseError("AccessField - Invalid access type")) - }; - - let access_attrib = match (flags_raw & 0xC0) >> 6 { - 0 => match data[2] { - 0x02 => AccessAttrib::AttribQuick, - 0x04 => AccessAttrib::AttribSendReceive, - 0x06 => AccessAttrib::AttribByte, - 0x08 => AccessAttrib::AttribWord, - 0x0A => AccessAttrib::AttribBlock, - 0x0B => AccessAttrib::AttribBytes(data[3]), - 0x0C => AccessAttrib::AttribProcessCall, - 0x0D => AccessAttrib::AttribBlockProcessCall, - 0x0E => AccessAttrib::AttribRawBytes(data[3]), - 0x0F => AccessAttrib::AttribRawProcessBytes(data[3]), - _ => return Err(AmlError::AmlParseError("AccessField - Invalid access attrib")) - }, - 1 => AccessAttrib::AttribBytes(data[2]), - 2 => AccessAttrib::AttribRawBytes(data[2]), - 3 => AccessAttrib::AttribRawProcessBytes(data[2]), - _ => return Err(AmlError::AmlParseError("AccessField - Invalid access attrib")) - // This should never happen but the compiler bitches if I don't cover this - }; - - Ok(AmlParseTypeGeneric { - val: AccessField { access_type, access_attrib }, - len: if data[0] == 0x01 { - 3 as usize - } else { - 4 as usize - } - }) -} - -fn parse_connect_field(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 1 { - return Err(AmlError::AmlParseError("ConnectField - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x02); - - if let Ok(e) = parse_def_buffer(&data[1..], ctx) { - Ok(AmlParseType { - val: e.val, - len: e.len + 1 - }) - } else { - let name = parse_name_string(&data[1..], ctx)?; - Ok(AmlParseType { - val: AmlValue::Alias(name.val.get_as_string()?), - len: name.len + 1 - }) - } -} - -fn parse_def_method(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 1 { - return Err(AmlError::AmlParseError("DefMethod - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x14); - - let (pkg_len, pkg_len_len) = parse_pkg_length(&data[1..])?; - let name = parse_name_string(&data[1 + pkg_len_len..], ctx)?; - let flags = data[1 + pkg_len_len + name.len]; - - let arg_count = flags & 0x07; - let serialized = (flags & 0x08) == 0x08; - let sync_level = flags & 0xF0 >> 4; - - let term_list = &data[2 + pkg_len_len + name.len .. 1 + pkg_len]; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - ctx.add_to_namespace(local_scope_string, AmlValue::Method(Method { - arg_count, - serialized, - sync_level, - term_list: term_list.to_vec() - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 + pkg_len - }) -} - -fn parse_def_mutex(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 1 { - return Err(AmlError::AmlParseError("DefMutex - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x01); - - let name = parse_name_string(&data[2 ..], ctx)?; - let flags = data[2 + name.len]; - let sync_level = flags & 0x0F; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - ctx.add_to_namespace(local_scope_string, AmlValue::Mutex((sync_level, None)))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 3 + name.len - }) -} - -fn parse_def_power_res(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 5 { - return Err(AmlError::AmlParseError("DefPowerRes - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - // TODO: How to handle local context deferreds - parser_opcode_extended!(data, 0x84); - - let (pkg_len, pkg_len_len) = parse_pkg_length(&data[2..])?; - let name = parse_name_string(&data[2 + pkg_len_len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - let system_level = data[2 + pkg_len_len + name.len]; - let resource_order: u16 = (data[3 + pkg_len_len + name.len] as u16) + - ((data[4 + pkg_len_len + name.len] as u16) << 8); - - let mut local_ctx = AmlExecutionContext::new(local_scope_string.clone()); - parse_object_list(&data[5 + pkg_len_len + name.len .. 2 + pkg_len], &mut local_ctx)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::PowerResource(PowerResource { - system_level, - resource_order, - obj_list: local_ctx.namespace_delta.clone() - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + pkg_len - }) -} - -fn parse_def_processor(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 8 { - return Err(AmlError::AmlParseError("DefProcessor - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x83); - - let (pkg_len, pkg_len_len) = parse_pkg_length(&data[2..])?; - let name = parse_name_string(&data[2 + pkg_len_len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - let proc_id = data[2 + pkg_len_len + name.len]; - let p_blk_addr: u32 = (data[3 + pkg_len_len + name.len] as u32) + - ((data[4 + pkg_len_len + name.len] as u32) << 8) + - ((data[5 + pkg_len_len + name.len] as u32) << 16) + - ((data[6 + pkg_len_len + name.len] as u32) << 24); - let p_blk_len = data[7 + pkg_len_len + name.len]; - - let mut local_ctx = AmlExecutionContext::new(local_scope_string.clone()); - parse_object_list(&data[8 + pkg_len_len + name.len .. 2 + pkg_len], &mut local_ctx)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::Processor(Processor { - proc_id: proc_id, - p_blk: if p_blk_len > 0 { Some(p_blk_addr) } else { None }, - obj_list: local_ctx.namespace_delta.clone(), - notify_methods: BTreeMap::new() - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + pkg_len - }) -} - -fn parse_def_thermal_zone(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefThermalZone - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x85); - - let (pkg_len, pkg_len_len) = parse_pkg_length(&data[2..])?; - let name = parse_name_string(&data[2 + pkg_len_len .. 2 + pkg_len], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - let mut local_ctx = AmlExecutionContext::new(local_scope_string.clone()); - parse_object_list(&data[2 + pkg_len_len + name.len .. 2 + pkg_len], &mut local_ctx)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::ThermalZone(ThermalZone { - obj_list: local_ctx.namespace_delta.clone(), - notify_methods: BTreeMap::new() - }))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + pkg_len - }) -} - -fn parse_def_external(data: &[u8], ctx: &mut AmlExecutionContext) -> ParseResult { - if data.len() < 2 { - return Err(AmlError::AmlParseError("DefExternal - data truncated")) - } - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x15); - - let object_name = parse_name_string(&data[1..], ctx)?; - let object_type = data[1 + object_name.len]; - let argument_count = data[2 + object_name.len]; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), object_name.val)?; - - let obj = match object_type { - 8 => AmlValue::Method(Method { - arg_count: argument_count, - serialized: false, - sync_level: 0, - term_list: vec!() - }), - _ => AmlValue::Uninitialized - }; - - ctx.add_to_namespace(local_scope_string, obj)?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 3 + object_name.len - }) -} diff --git a/src/acpi/aml/namespace.rs b/src/acpi/aml/namespace.rs deleted file mode 100644 index 906d3f63..00000000 --- a/src/acpi/aml/namespace.rs +++ /dev/null @@ -1,493 +0,0 @@ -use alloc::boxed::Box; -use alloc::string::String; -use alloc::string::ToString; -use alloc::vec::Vec; -use alloc::btree_map::BTreeMap; - -use core::fmt::{Debug, Formatter, Error}; -use core::str::FromStr; - -use super::termlist::parse_term_list; -use super::namedobj::{ RegionSpace, FieldFlags }; -use super::parser::{AmlExecutionContext, ExecutionState}; -use super::AmlError; - -use acpi::{SdtSignature, get_signature_from_index, get_index_from_signature}; - -#[derive(Clone)] -pub enum FieldSelector { - Region(String), - Bank { - region: String, - bank_register: String, - bank_selector: Box - }, - Index { - index_selector: String, - data_selector: String - } -} - -#[derive(Clone)] -pub enum ObjectReference { - ArgObj(u8), - LocalObj(u8), - Object(String), - Index(Box, Box) -} - -#[derive(Clone)] -pub struct Method { - pub arg_count: u8, - pub serialized: bool, - pub sync_level: u8, - pub term_list: Vec -} - -#[derive(Clone)] -pub struct BufferField { - pub source_buf: Box, - pub index: Box, - pub length: Box -} - -#[derive(Clone)] -pub struct FieldUnit { - pub selector: FieldSelector, - pub connection: Box, - pub flags: FieldFlags, - pub offset: usize, - pub length: usize -} - -#[derive(Clone)] -pub struct Device { - pub obj_list: Vec, - pub notify_methods: BTreeMap> -} - -#[derive(Clone)] -pub struct ThermalZone { - pub obj_list: Vec, - pub notify_methods: BTreeMap> -} - -#[derive(Clone)] -pub struct Processor { - pub proc_id: u8, - pub p_blk: Option, - pub obj_list: Vec, - pub notify_methods: BTreeMap> -} - -#[derive(Clone)] -pub struct OperationRegion { - pub region: RegionSpace, - pub offset: Box, - pub len: Box, - pub accessor: Accessor, - pub accessed_by: Option -} - -#[derive(Clone)] -pub struct PowerResource { - pub system_level: u8, - pub resource_order: u16, - pub obj_list: Vec -} - -pub struct Accessor { - pub read: fn(usize) -> u64, - pub write: fn(usize, u64) -} - -impl Clone for Accessor { - fn clone(&self) -> Accessor { - Accessor { - read: (*self).read, - write: (*self).write - } - } -} - -#[derive(Clone)] -pub enum AmlValue { - None, - Uninitialized, - Alias(String), - Buffer(Vec), - BufferField(BufferField), - DDBHandle((Vec, SdtSignature)), - DebugObject, - Device(Device), - Event(u64), - FieldUnit(FieldUnit), - Integer(u64), - IntegerConstant(u64), - Method(Method), - Mutex((u8, Option)), - ObjectReference(ObjectReference), - OperationRegion(OperationRegion), - Package(Vec), - String(String), - PowerResource(PowerResource), - Processor(Processor), - RawDataBuffer(Vec), - ThermalZone(ThermalZone) -} - -impl Debug for AmlValue { - fn fmt(&self, _f: &mut Formatter) -> Result<(), Error> { Ok(()) } -} - -impl AmlValue { - pub fn get_type_string(&self) -> String { - match *self { - AmlValue::Uninitialized => String::from_str("[Uninitialized Object]").unwrap(), - AmlValue::Integer(_) => String::from_str("[Integer]").unwrap(), - AmlValue::String(_) => String::from_str("[String]").unwrap(), - AmlValue::Buffer(_) => String::from_str("[Buffer]").unwrap(), - AmlValue::Package(_) => String::from_str("[Package]").unwrap(), - AmlValue::FieldUnit(_) => String::from_str("[Field]").unwrap(), - AmlValue::Device(_) => String::from_str("[Device]").unwrap(), - AmlValue::Event(_) => String::from_str("[Event]").unwrap(), - AmlValue::Method(_) => String::from_str("[Control Method]").unwrap(), - AmlValue::Mutex(_) => String::from_str("[Mutex]").unwrap(), - AmlValue::OperationRegion(_) => String::from_str("[Operation Region]").unwrap(), - AmlValue::PowerResource(_) => String::from_str("[Power Resource]").unwrap(), - AmlValue::Processor(_) => String::from_str("[Processor]").unwrap(), - AmlValue::ThermalZone(_) => String::from_str("[Thermal Zone]").unwrap(), - AmlValue::BufferField(_) => String::from_str("[Buffer Field]").unwrap(), - AmlValue::DDBHandle(_) => String::from_str("[DDB Handle]").unwrap(), - AmlValue::DebugObject => String::from_str("[Debug Object]").unwrap(), - _ => String::new() - } - } - - pub fn get_as_type(&self, t: AmlValue) -> Result { - match t { - AmlValue::None => Ok(AmlValue::None), - AmlValue::Uninitialized => Ok(self.clone()), - AmlValue::Alias(_) => match *self { - AmlValue::Alias(_) => Ok(self.clone()), - _ => Err(AmlError::AmlValueError) - }, - AmlValue::Buffer(_) => Ok(AmlValue::Buffer(self.get_as_buffer()?)), - AmlValue::BufferField(_) => Ok(AmlValue::BufferField(self.get_as_buffer_field()?)), - AmlValue::DDBHandle(_) => Ok(AmlValue::DDBHandle(self.get_as_ddb_handle()?)), - AmlValue::DebugObject => match *self { - AmlValue::DebugObject => Ok(self.clone()), - _ => Err(AmlError::AmlValueError) - }, - AmlValue::Device(_) => Ok(AmlValue::Device(self.get_as_device()?)), - AmlValue::Event(_) => Ok(AmlValue::Event(self.get_as_event()?)), - AmlValue::FieldUnit(_) => Ok(AmlValue::FieldUnit(self.get_as_field_unit()?)), - AmlValue::Integer(_) => Ok(AmlValue::Integer(self.get_as_integer()?)), - AmlValue::IntegerConstant(_) => Ok(AmlValue::IntegerConstant(self.get_as_integer_constant()?)), - AmlValue::Method(_) => Ok(AmlValue::Method(self.get_as_method()?)), - AmlValue::Mutex(_) => Ok(AmlValue::Mutex(self.get_as_mutex()?)), - AmlValue::ObjectReference(_) => Ok(AmlValue::ObjectReference(self.get_as_object_reference()?)), - AmlValue::OperationRegion(_) => match *self { - AmlValue::OperationRegion(_) => Ok(self.clone()), - _ => Err(AmlError::AmlValueError) - }, - AmlValue::Package(_) => Ok(AmlValue::Package(self.get_as_package()?)), - AmlValue::String(_) => Ok(AmlValue::String(self.get_as_string()?)), - AmlValue::PowerResource(_) => Ok(AmlValue::PowerResource(self.get_as_power_resource()?)), - AmlValue::Processor(_) => Ok(AmlValue::Processor(self.get_as_processor()?)), - AmlValue::RawDataBuffer(_) => Ok(AmlValue::RawDataBuffer(self.get_as_raw_data_buffer()?)), - AmlValue::ThermalZone(_) => Ok(AmlValue::ThermalZone(self.get_as_thermal_zone()?)) - } - } - - pub fn get_as_buffer(&self) -> Result, AmlError> { - match *self { - AmlValue::Buffer(ref b) => Ok(b.clone()), - AmlValue::Integer(ref i) => { - let mut v: Vec = vec!(); - let mut i = i.clone(); - - while i != 0 { - v.push((i & 0xFF) as u8); - i >>= 8; - } - - while v.len() < 8 { - v.push(0); - } - - Ok(v) - }, - AmlValue::String(ref s) => { - Ok(s.clone().into_bytes()) - }, - AmlValue::BufferField(ref b) => { - let buf = b.source_buf.get_as_buffer()?; - let idx = b.index.get_as_integer()? as usize; - let len = b.length.get_as_integer()? as usize; - - if idx + len > buf.len() { - return Err(AmlError::AmlValueError); - } - - Ok(buf[idx .. idx + len].to_vec()) - }, - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_buffer_field(&self) -> Result { - match *self { - AmlValue::BufferField(ref b) => Ok(b.clone()), - _ => { - let raw_buf = self.get_as_buffer()?; - let buf = Box::new(AmlValue::Buffer(raw_buf.clone())); - let idx = Box::new(AmlValue::IntegerConstant(0)); - let len = Box::new(AmlValue::Integer(raw_buf.len() as u64)); - - Ok(BufferField { - source_buf: buf, - index: idx, - length: len - }) - } - } - } - - pub fn get_as_ddb_handle(&self) -> Result<(Vec, SdtSignature), AmlError> { - match *self { - AmlValue::DDBHandle(ref v) => Ok(v.clone()), - AmlValue::Integer(i) => if let Some(sig) = get_signature_from_index(i as usize) { - Ok((vec!(), sig)) - } else { - Err(AmlError::AmlValueError) - }, - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_device(&self) -> Result { - match *self { - AmlValue::Device(ref s) => Ok(s.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_event(&self) -> Result { - match *self { - AmlValue::Event(ref e) => Ok(e.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_field_unit(&self) -> Result { - match *self { - AmlValue::FieldUnit(ref e) => Ok(e.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_integer(&self) -> Result { - match *self { - AmlValue::IntegerConstant(ref i) => Ok(i.clone()), - AmlValue::Integer(ref i) => Ok(i.clone()), - AmlValue::Buffer(ref b) => { - let mut b = b.clone(); - if b.len() > 8 { - return Err(AmlError::AmlValueError); - } - - let mut i: u64 = 0; - - while b.len() > 0 { - i <<= 8; - i += b.pop().expect("Won't happen") as u64; - } - - Ok(i) - }, - AmlValue::BufferField(_) => { - let mut b = self.get_as_buffer()?; - if b.len() > 8 { - return Err(AmlError::AmlValueError); - } - - let mut i: u64 = 0; - - while b.len() > 0 { - i <<= 8; - i += b.pop().expect("Won't happen") as u64; - } - - Ok(i) - }, - AmlValue::DDBHandle(ref v) => if let Some(idx) = get_index_from_signature(v.1.clone()) { - Ok(idx as u64) - } else { - Err(AmlError::AmlValueError) - }, - AmlValue::String(ref s) => { - let s = s.clone()[0..8].to_string().to_uppercase(); - let mut i: u64 = 0; - - for c in s.chars() { - if !c.is_digit(16) { - break; - } - - i <<= 8; - i += c.to_digit(16).unwrap() as u64; - } - - Ok(i) - }, - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_integer_constant(&self) -> Result { - match *self { - AmlValue::IntegerConstant(ref i) => Ok(i.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_method(&self) -> Result { - match *self { - AmlValue::Method(ref m) => Ok(m.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_mutex(&self) -> Result<(u8, Option), AmlError> { - match *self { - AmlValue::Mutex(ref m) => Ok(m.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_object_reference(&self) -> Result { - match *self { - AmlValue::ObjectReference(ref m) => Ok(m.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - /* - pub fn get_as_operation_region(&self) -> Result { - match *self { - AmlValue::OperationRegion(ref p) => Ok(p.clone()), - _ => Err(AmlError::AmlValueError) - } - } - */ - - pub fn get_as_package(&self) -> Result, AmlError> { - match *self { - AmlValue::Package(ref p) => Ok(p.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_string(&self) -> Result { - match *self { - AmlValue::String(ref s) => Ok(s.clone()), - AmlValue::Integer(ref i) => Ok(format!("{:X}", i)), - AmlValue::IntegerConstant(ref i) => Ok(format!("{:X}", i)), - AmlValue::Buffer(ref b) => Ok(String::from_utf8(b.clone()).expect("Invalid UTF-8")), - AmlValue::BufferField(_) => { - let b = self.get_as_buffer()?; - Ok(String::from_utf8(b).expect("Invalid UTF-8")) - }, - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_power_resource(&self) -> Result { - match *self { - AmlValue::PowerResource(ref p) => Ok(p.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_processor(&self) -> Result { - match *self { - AmlValue::Processor(ref p) => Ok(p.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_raw_data_buffer(&self) -> Result, AmlError> { - match *self { - AmlValue::RawDataBuffer(ref p) => Ok(p.clone()), - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_as_thermal_zone(&self) -> Result { - match *self { - AmlValue::ThermalZone(ref p) => Ok(p.clone()), - _ => Err(AmlError::AmlValueError) - } - } -} - -impl Method { - pub fn execute(&self, scope: String, parameters: Vec) -> AmlValue { - let mut ctx = AmlExecutionContext::new(scope); - ctx.init_arg_vars(parameters); - - let _ = parse_term_list(&self.term_list[..], &mut ctx); - ctx.clean_namespace(); - - match ctx.state { - ExecutionState::RETURN(v) => v, - _ => AmlValue::IntegerConstant(0) - } - } -} - -pub fn get_namespace_string(current: String, modifier_v: AmlValue) -> Result { - let mut modifier = modifier_v.get_as_string()?; - - if current.len() == 0 { - return Ok(modifier); - } - - if modifier.len() == 0 { - return Ok(current); - } - - if modifier.starts_with("\\") { - return Ok(modifier); - } - - let mut namespace = current.clone(); - - if modifier.starts_with("^") { - while modifier.starts_with("^") { - modifier = modifier[1..].to_string(); - - if namespace.ends_with("\\") { - return Err(AmlError::AmlValueError); - } - - loop { - if namespace.ends_with(".") { - namespace.pop(); - break; - } - - if namespace.pop() == None { - return Err(AmlError::AmlValueError); - } - } - } - } - - if !namespace.ends_with("\\") { - namespace.push('.'); - } - - Ok(namespace + &modifier) -} diff --git a/src/acpi/aml/namespacemodifier.rs b/src/acpi/aml/namespacemodifier.rs deleted file mode 100644 index 77fa1406..00000000 --- a/src/acpi/aml/namespacemodifier.rs +++ /dev/null @@ -1,106 +0,0 @@ -use super::AmlError; -use super::parser::{AmlParseType, ParseResult, AmlExecutionContext, ExecutionState}; -use super::namespace::{AmlValue, get_namespace_string}; -use super::pkglength::parse_pkg_length; -use super::namestring::parse_name_string; -use super::termlist::parse_term_list; -use super::dataobj::parse_data_ref_obj; - -pub fn parse_namespace_modifier(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_alias_op, - parse_scope_op, - parse_name_op - }; - - Err(AmlError::AmlInvalidOpCode) -} - -fn parse_alias_op(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x06); - - let source_name = parse_name_string(&data[1..], ctx)?; - let alias_name = parse_name_string(&data[1 + source_name.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), source_name.val)?; - let local_alias_string = get_namespace_string(ctx.scope.clone(), alias_name.val)?; - - ctx.add_to_namespace(local_scope_string, AmlValue::Alias(local_alias_string))?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 + source_name.len + alias_name.len - }) -} - -fn parse_name_op(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x08); - - let name = parse_name_string(&data[1..], ctx)?; - let data_ref_obj = parse_data_ref_obj(&data[1 + name.len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val)?; - - ctx.add_to_namespace(local_scope_string, data_ref_obj.val)?; - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 + name.len + data_ref_obj.len - }) -} - -fn parse_scope_op(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x10); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[1..])?; - let name = parse_name_string(&data[1 + pkg_length_len..], ctx)?; - - let local_scope_string = get_namespace_string(ctx.scope.clone(), name.val.clone())?; - let containing_scope_string = ctx.scope.clone(); - - ctx.scope = local_scope_string; - parse_term_list(&data[1 + pkg_length_len + name.len .. 1 + pkg_length], ctx)?; - ctx.scope = containing_scope_string; - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 + pkg_length - }) -} diff --git a/src/acpi/aml/namestring.rs b/src/acpi/aml/namestring.rs deleted file mode 100644 index ecc52daa..00000000 --- a/src/acpi/aml/namestring.rs +++ /dev/null @@ -1,226 +0,0 @@ -use alloc::vec::Vec; -use alloc::string::String; - -use super::AmlError; -use super::parser::{AmlParseType, ParseResult, AmlExecutionContext, ExecutionState}; -use super::namespace::AmlValue; -use super::dataobj::{parse_arg_obj, parse_local_obj}; -use super::type2opcode::parse_type6_opcode; - -pub fn parse_name_string(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - let mut characters: Vec = vec!(); - let mut starting_index: usize = 0; - - if data[0] == 0x5C { - characters.push(data[0]); - starting_index = 1; - } else if data[0] == 0x5E { - while data[starting_index] == 0x5E { - characters.push(data[starting_index]); - starting_index += 1; - } - } - - let sel = |data| { - parser_selector_simple! { - data, - parse_dual_name_path, - parse_multi_name_path, - parse_null_name, - parse_name_seg - }; - - Err(AmlError::AmlInvalidOpCode) - }; - let (mut chr, len) = sel(&data[starting_index..])?; - characters.append(&mut chr); - - let name_string = String::from_utf8(characters); - - match name_string { - Ok(s) => Ok(AmlParseType { - val: AmlValue::String(s.clone()), - len: len + starting_index - }), - Err(_) => Err(AmlError::AmlParseError("Namestring - Name is invalid")) - } -} - -fn parse_null_name(data: &[u8]) -> Result<(Vec, usize), AmlError> { - parser_opcode!(data, 0x00); - Ok((vec!(), 1 )) -} - -pub fn parse_name_seg(data: &[u8]) -> Result<(Vec, usize), AmlError> { - match data[0] { - 0x41 ... 0x5A | 0x5F => (), - _ => return Err(AmlError::AmlInvalidOpCode) - } - - match data[1] { - 0x30 ... 0x39 | 0x41 ... 0x5A | 0x5F => (), - _ => return Err(AmlError::AmlInvalidOpCode) - } - - match data[2] { - 0x30 ... 0x39 | 0x41 ... 0x5A | 0x5F => (), - _ => return Err(AmlError::AmlInvalidOpCode) - } - - match data[3] { - 0x30 ... 0x39 | 0x41 ... 0x5A | 0x5F => (), - _ => return Err(AmlError::AmlInvalidOpCode) - } - - let mut name_seg = vec!(data[0], data[1], data[2], data[3]); - while *(name_seg.last().unwrap()) == 0x5F { - name_seg.pop(); - } - - Ok((name_seg, 4)) -} - -fn parse_dual_name_path(data: &[u8]) -> Result<(Vec, usize), AmlError> { - parser_opcode!(data, 0x2E); - - let mut characters: Vec = vec!(); - let mut dual_len: usize = 1; - - match parse_name_seg(&data[1..5]) { - Ok((mut v, len)) => { - characters.append(&mut v); - dual_len += len; - }, - Err(e) => return Err(e) - } - - characters.push(0x2E); - - match parse_name_seg(&data[5..9]) { - Ok((mut v, len)) => { - characters.append(&mut v); - dual_len += len; - }, - Err(e) => return Err(e) - } - - Ok((characters, dual_len)) -} - -fn parse_multi_name_path(data: &[u8]) -> Result<(Vec, usize), AmlError> { - parser_opcode!(data, 0x2F); - - let seg_count = data[1]; - if seg_count == 0x00 { - return Err(AmlError::AmlParseError("MultiName Path - can't have zero name segments")); - } - - let mut current_seg = 0; - let mut characters: Vec = vec!(); - let mut multi_len: usize = 2; - - while current_seg < seg_count { - match parse_name_seg(&data[(current_seg as usize * 4) + 2 ..]) { - Ok((mut v, len)) => { - characters.append(&mut v); - multi_len += len; - }, - Err(e) => return Err(e) - } - - characters.push(0x2E); - - current_seg += 1; - } - - characters.pop(); - - Ok((characters, multi_len)) -} - -pub fn parse_super_name(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_simple_name, - parse_type6_opcode, - parse_debug_obj - }; - - Err(AmlError::AmlInvalidOpCode) -} - -fn parse_debug_obj(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x31); - - Ok(AmlParseType { - val: AmlValue::DebugObject, - len: 2 - }) -} - -pub fn parse_simple_name(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_name_string, - parse_arg_obj, - parse_local_obj - }; - - Err(AmlError::AmlInvalidOpCode) -} - -pub fn parse_target(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - if data[0] == 0x00 { - Ok(AmlParseType { - val: AmlValue::None, - len: 1 - }) - } else { - parse_super_name(data, ctx) - } -} diff --git a/src/acpi/aml/parser.rs b/src/acpi/aml/parser.rs deleted file mode 100644 index c4a8f922..00000000 --- a/src/acpi/aml/parser.rs +++ /dev/null @@ -1,552 +0,0 @@ -use alloc::string::String; -use alloc::btree_map::BTreeMap; -use alloc::vec::Vec; -use alloc::boxed::Box; - -use spin::RwLockWriteGuard; - -use super::namespace::{ AmlValue, ObjectReference }; -use super::AmlError; - -use acpi::ACPI_TABLE; - -pub type ParseResult = Result; -pub type AmlParseType = AmlParseTypeGeneric; - -pub struct AmlParseTypeGeneric { - pub val: T, - pub len: usize -} - -pub enum ExecutionState { - EXECUTING, - CONTINUE, - BREAK, - RETURN(AmlValue) -} - -pub struct AmlExecutionContext { - pub scope: String, - pub local_vars: [AmlValue; 8], - pub arg_vars: [AmlValue; 8], - pub state: ExecutionState, - pub namespace_delta: Vec, - pub ctx_id: u64, - pub sync_level: u8 -} - -impl AmlExecutionContext { - pub fn new(scope: String) -> AmlExecutionContext { - let mut idptr = ACPI_TABLE.next_ctx.write(); - let id: u64 = *idptr; - - *idptr += 1; - - AmlExecutionContext { - scope: scope, - local_vars: [AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized], - arg_vars: [AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized, - AmlValue::Uninitialized], - state: ExecutionState::EXECUTING, - namespace_delta: vec!(), - ctx_id: id, - sync_level: 0 - } - } - - pub fn wait_for_event(&mut self, event_ptr: AmlValue) -> Result { - let mut namespace_ptr = self.prelock(); - let namespace = match *namespace_ptr { - Some(ref mut n) => n, - None => return Err(AmlError::AmlHardFatal) - }; - - let mutex_idx = match event_ptr { - AmlValue::String(ref s) => s.clone(), - AmlValue::ObjectReference(ref o) => match *o { - ObjectReference::Object(ref s) => s.clone(), - _ => return Err(AmlError::AmlValueError) - }, - _ => return Err(AmlError::AmlValueError) - }; - - let mutex = match namespace.get(&mutex_idx) { - Some(s) => s.clone(), - None => return Err(AmlError::AmlValueError) - }; - - match mutex { - AmlValue::Event(count) => { - if count > 0 { - namespace.insert(mutex_idx, AmlValue::Event(count - 1)); - return Ok(true); - } - }, - _ => return Err(AmlError::AmlValueError) - } - - Ok(false) - } - - pub fn signal_event(&mut self, event_ptr: AmlValue) -> Result<(), AmlError> { - let mut namespace_ptr = self.prelock(); - let namespace = match *namespace_ptr { - Some(ref mut n) => n, - None => return Err(AmlError::AmlHardFatal) - }; - - - let mutex_idx = match event_ptr { - AmlValue::String(ref s) => s.clone(), - AmlValue::ObjectReference(ref o) => match *o { - ObjectReference::Object(ref s) => s.clone(), - _ => return Err(AmlError::AmlValueError) - }, - _ => return Err(AmlError::AmlValueError) - }; - - let mutex = match namespace.get(&mutex_idx) { - Some(s) => s.clone(), - None => return Err(AmlError::AmlValueError) - }; - - match mutex { - AmlValue::Event(count) => { - namespace.insert(mutex_idx, AmlValue::Event(count + 1)); - }, - _ => return Err(AmlError::AmlValueError) - } - - Ok(()) - } - - pub fn release_mutex(&mut self, mutex_ptr: AmlValue) -> Result<(), AmlError> { - let id = self.ctx_id; - - let mut namespace_ptr = self.prelock(); - let namespace = match *namespace_ptr { - Some(ref mut n) => n, - None => return Err(AmlError::AmlHardFatal) - }; - - let mutex_idx = match mutex_ptr { - AmlValue::String(ref s) => s.clone(), - AmlValue::ObjectReference(ref o) => match *o { - ObjectReference::Object(ref s) => s.clone(), - _ => return Err(AmlError::AmlValueError) - }, - _ => return Err(AmlError::AmlValueError) - }; - - let mutex = match namespace.get(&mutex_idx) { - Some(s) => s.clone(), - None => return Err(AmlError::AmlValueError) - }; - - match mutex { - AmlValue::Mutex((sync_level, owner)) => { - if let Some(o) = owner { - if o == id { - if sync_level == self.sync_level { - namespace.insert(mutex_idx, AmlValue::Mutex((sync_level, None))); - return Ok(()); - } else { - return Err(AmlError::AmlValueError); - } - } else { - return Err(AmlError::AmlHardFatal); - } - } - }, - AmlValue::OperationRegion(ref region) => { - if let Some(o) = region.accessed_by { - if o == id { - let mut new_region = region.clone(); - new_region.accessed_by = None; - - namespace.insert(mutex_idx, AmlValue::OperationRegion(new_region)); - return Ok(()); - } else { - return Err(AmlError::AmlHardFatal); - } - } - }, - _ => return Err(AmlError::AmlValueError) - } - - Ok(()) - } - - pub fn acquire_mutex(&mut self, mutex_ptr: AmlValue) -> Result { - let id = self.ctx_id; - - let mut namespace_ptr = self.prelock(); - let namespace = match *namespace_ptr { - Some(ref mut n) => n, - None => return Err(AmlError::AmlHardFatal) - }; - let mutex_idx = match mutex_ptr { - AmlValue::String(ref s) => s.clone(), - AmlValue::ObjectReference(ref o) => match *o { - ObjectReference::Object(ref s) => s.clone(), - _ => return Err(AmlError::AmlValueError) - }, - _ => return Err(AmlError::AmlValueError) - }; - - let mutex = match namespace.get(&mutex_idx) { - Some(s) => s.clone(), - None => return Err(AmlError::AmlValueError) - }; - - match mutex { - AmlValue::Mutex((sync_level, owner)) => { - if owner == None { - if sync_level < self.sync_level { - return Err(AmlError::AmlValueError); - } - - namespace.insert(mutex_idx, AmlValue::Mutex((sync_level, Some(id)))); - self.sync_level = sync_level; - - return Ok(true); - } - }, - AmlValue::OperationRegion(ref o) => { - if o.accessed_by == None { - let mut new_region = o.clone(); - new_region.accessed_by = Some(id); - - namespace.insert(mutex_idx, AmlValue::OperationRegion(new_region)); - return Ok(true); - } - }, - _ => return Err(AmlError::AmlValueError) - } - - Ok(false) - } - - pub fn add_to_namespace(&mut self, name: String, value: AmlValue) -> Result<(), AmlError> { - let mut namespace = ACPI_TABLE.namespace.write(); - - if let Some(ref mut namespace) = *namespace { - if let Some(obj) = namespace.get(&name) { - match *obj { - AmlValue::Uninitialized => (), - AmlValue::Method(ref m) => { - if m.term_list.len() != 0 { - return Err(AmlError::AmlValueError); - } - }, - _ => return Err(AmlError::AmlValueError) - } - } - - self.namespace_delta.push(name.clone()); - namespace.insert(name, value); - - Ok(()) - } else { - Err(AmlError::AmlValueError) - } - } - - pub fn clean_namespace(&mut self) { - let mut namespace = ACPI_TABLE.namespace.write(); - - if let Some(ref mut namespace) = *namespace { - for k in &self.namespace_delta { - namespace.remove(k); - } - } - } - - pub fn init_arg_vars(&mut self, parameters: Vec) { - if parameters.len() > 8 { - return; - } - - let mut cur = 0; - while cur < parameters.len() { - self.arg_vars[cur] = parameters[cur].clone(); - cur += 1; - } - } - - pub fn prelock(&mut self) -> RwLockWriteGuard<'static, Option>> { - ACPI_TABLE.namespace.write() - } - - fn modify_local_obj(&mut self, local: usize, value: AmlValue) -> Result<(), AmlError> { - self.local_vars[local] = value.get_as_type(self.local_vars[local].clone())?; - Ok(()) - } - - fn modify_object(&mut self, name: String, value: AmlValue) -> Result<(), AmlError> { - if let Some(ref mut namespace) = *ACPI_TABLE.namespace.write() { - let coercion_obj = { - let obj = namespace.get(&name); - - if let Some(o) = obj { - o.clone() - } else { - AmlValue::Uninitialized - } - }; - - namespace.insert(name, value.get_as_type(coercion_obj)?); - Ok(()) - } else { - Err(AmlError::AmlHardFatal) - } - } - - fn modify_index_final(&mut self, name: String, value: AmlValue, indices: Vec) -> Result<(), AmlError> { - if let Some(ref mut namespace) = *ACPI_TABLE.namespace.write() { - let mut obj = if let Some(s) = namespace.get(&name) { - s.clone() - } else { - return Err(AmlError::AmlValueError); - }; - - obj = self.modify_index_core(obj, value, indices)?; - - namespace.insert(name, obj); - Ok(()) - } else { - Err(AmlError::AmlValueError) - } - } - - fn modify_index_core(&mut self, obj: AmlValue, value: AmlValue, indices: Vec) -> Result { - match obj { - AmlValue::String(ref string) => { - if indices.len() != 1 { - return Err(AmlError::AmlValueError); - } - - let mut bytes = string.clone().into_bytes(); - bytes[indices[0] as usize] = value.get_as_integer()? as u8; - - let string = String::from_utf8(bytes).unwrap(); - - Ok(AmlValue::String(string)) - }, - AmlValue::Buffer(ref b) => { - if indices.len() != 1 { - return Err(AmlError::AmlValueError); - } - - let mut b = b.clone(); - b[indices[0] as usize] = value.get_as_integer()? as u8; - - Ok(AmlValue::Buffer(b)) - }, - AmlValue::BufferField(ref b) => { - if indices.len() != 1 { - return Err(AmlError::AmlValueError); - } - - let mut idx = indices[0]; - idx += b.index.get_as_integer()?; - - let _ = self.modify(AmlValue::ObjectReference(ObjectReference::Index(b.source_buf.clone(), Box::new(AmlValue::Integer(idx.clone())))), value); - - Ok(AmlValue::BufferField(b.clone())) - }, - AmlValue::Package(ref p) => { - if indices.len() == 0 { - return Err(AmlError::AmlValueError); - } - - let mut p = p.clone(); - - if indices.len() == 1 { - p[indices[0] as usize] = value; - } else { - p[indices[0] as usize] = self.modify_index_core(p[indices[0] as usize].clone(), value, indices[1..].to_vec())?; - } - - Ok(AmlValue::Package(p)) - }, - _ => Err(AmlError::AmlValueError) - } - } - - pub fn modify_index(&mut self, name: AmlValue, value: AmlValue, indices: Vec) -> Result<(), AmlError>{ - match name { - AmlValue::ObjectReference(r) => match r { - ObjectReference::Object(s) => self.modify_index_final(s, value, indices), - ObjectReference::Index(c, v) => { - let mut indices = indices.clone(); - indices.push(v.get_as_integer()?); - - self.modify_index(*c, value, indices) - }, - ObjectReference::ArgObj(_) => Err(AmlError::AmlValueError), - ObjectReference::LocalObj(i) => { - let v = self.local_vars[i as usize].clone(); - self.local_vars[i as usize] = self.modify_index_core(v, value, indices)?; - - Ok(()) - } - }, - _ => Err(AmlError::AmlValueError) - } - } - - pub fn modify(&mut self, name: AmlValue, value: AmlValue) -> Result<(), AmlError> { - match name { - AmlValue::ObjectReference(r) => match r { - ObjectReference::ArgObj(_) => Err(AmlError::AmlValueError), - ObjectReference::LocalObj(i) => self.modify_local_obj(i as usize, value), - ObjectReference::Object(s) => self.modify_object(s, value), - ObjectReference::Index(c, v) => self.modify_index(*c, value, vec!(v.get_as_integer()?)) - }, - AmlValue::String(s) => self.modify_object(s, value), - _ => Err(AmlError::AmlValueError) - } - } - - fn copy_local_obj(&mut self, local: usize, value: AmlValue) -> Result<(), AmlError> { - self.local_vars[local] = value; - Ok(()) - } - - fn copy_object(&mut self, name: String, value: AmlValue) -> Result<(), AmlError> { - if let Some(ref mut namespace) = *ACPI_TABLE.namespace.write() { - namespace.insert(name, value); - Ok(()) - } else { - Err(AmlError::AmlHardFatal) - } - } - - pub fn copy(&mut self, name: AmlValue, value: AmlValue) -> Result<(), AmlError> { - match name { - AmlValue::ObjectReference(r) => match r { - ObjectReference::ArgObj(_) => Err(AmlError::AmlValueError), - ObjectReference::LocalObj(i) => self.copy_local_obj(i as usize, value), - ObjectReference::Object(s) => self.copy_object(s, value), - ObjectReference::Index(c, v) => self.modify_index(*c, value, vec!(v.get_as_integer()?)) - }, - AmlValue::String(s) => self.copy_object(s, value), - _ => Err(AmlError::AmlValueError) - } - } - - fn get_index_final(&self, name: String, indices: Vec) -> Result { - if let Some(ref namespace) = *ACPI_TABLE.namespace.read() { - let obj = if let Some(s) = namespace.get(&name) { - s.clone() - } else { - return Err(AmlError::AmlValueError); - }; - - self.get_index_core(obj, indices) - } else { - Err(AmlError::AmlValueError) - } - } - - fn get_index_core(&self, obj: AmlValue, indices: Vec) -> Result { - match obj { - AmlValue::String(ref string) => { - if indices.len() != 1 { - return Err(AmlError::AmlValueError); - } - - let bytes = string.clone().into_bytes(); - Ok(AmlValue::Integer(bytes[indices[0] as usize] as u64)) - }, - AmlValue::Buffer(ref b) => { - if indices.len() != 1 { - return Err(AmlError::AmlValueError); - } - - Ok(AmlValue::Integer(b[indices[0] as usize] as u64)) - }, - AmlValue::BufferField(ref b) => { - if indices.len() != 1 { - return Err(AmlError::AmlValueError); - } - - let mut idx = indices[0]; - idx += b.index.get_as_integer()?; - - Ok(AmlValue::Integer(b.source_buf.get_as_buffer()?[idx as usize] as u64)) - }, - AmlValue::Package(ref p) => { - if indices.len() == 0 { - return Err(AmlError::AmlValueError); - } - - if indices.len() == 1 { - Ok(p[indices[0] as usize].clone()) - } else { - self.get_index_core(p[indices[0] as usize].clone(), indices[1..].to_vec()) - } - }, - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get_index(&self, name: AmlValue, indices: Vec) -> Result{ - match name { - AmlValue::ObjectReference(r) => match r { - ObjectReference::Object(s) => self.get_index_final(s, indices), - ObjectReference::Index(c, v) => { - let mut indices = indices.clone(); - indices.push(v.get_as_integer()?); - - self.get_index(*c, indices) - }, - ObjectReference::ArgObj(_) => Err(AmlError::AmlValueError), - ObjectReference::LocalObj(i) => { - let v = self.local_vars[i as usize].clone(); - self.get_index_core(v, indices) - } - }, - _ => Err(AmlError::AmlValueError) - } - } - - pub fn get(&self, name: AmlValue) -> Result { - Ok(match name { - AmlValue::ObjectReference(r) => match r { - ObjectReference::ArgObj(i) => self.arg_vars[i as usize].clone(), - ObjectReference::LocalObj(i) => self.local_vars[i as usize].clone(), - ObjectReference::Object(ref s) => if let Some(ref namespace) = *ACPI_TABLE.namespace.read() { - if let Some(o) = namespace.get(s) { - o.clone() - } else { - AmlValue::None - } - } else { AmlValue::None }, - ObjectReference::Index(c, v) => self.get_index(*c, vec!(v.get_as_integer()?))?, - }, - AmlValue::String(ref s) => if let Some(ref namespace) = *ACPI_TABLE.namespace.read() { - if let Some(o) = namespace.get(s) { - o.clone() - } else { - AmlValue::None - } - } else { AmlValue::None }, - _ => AmlValue::None - }) - } -} diff --git a/src/acpi/aml/parsermacros.rs b/src/acpi/aml/parsermacros.rs deleted file mode 100644 index 31a23386..00000000 --- a/src/acpi/aml/parsermacros.rs +++ /dev/null @@ -1,52 +0,0 @@ -#[macro_export] -macro_rules! parser_selector { - {$data:expr, $ctx:expr, $func:expr} => { - match $func($data, $ctx) { - Ok(res) => return Ok(res), - Err(AmlError::AmlInvalidOpCode) => (), - Err(e) => return Err(e) - } - }; - {$data:expr, $ctx:expr, $func:expr, $($funcs:expr),+} => { - parser_selector! {$data, $ctx, $func}; - parser_selector! {$data, $ctx, $($funcs),*}; - }; -} - -#[macro_export] -macro_rules! parser_selector_simple { - {$data:expr, $func:expr} => { - match $func($data) { - Ok(res) => return Ok(res), - Err(AmlError::AmlInvalidOpCode) => (), - Err(e) => return Err(e) - } - }; - {$data:expr, $func:expr, $($funcs:expr),+} => { - parser_selector_simple! {$data, $func}; - parser_selector_simple! {$data, $($funcs),*}; - }; -} - -#[macro_export] -macro_rules! parser_opcode { - ($data:expr, $opcode:expr) => { - if $data[0] != $opcode { - return Err(AmlError::AmlInvalidOpCode); - } - }; - ($data:expr, $opcode:expr, $alternate_opcode:expr) => { - if $data[0] != $opcode && $data[0] != $alternate_opcode { - return Err(AmlError::AmlInvalidOpCode); - } - }; -} - -#[macro_export] -macro_rules! parser_opcode_extended { - ($data:expr, $opcode:expr) => { - if $data[0] != 0x5B || $data[1] != $opcode { - return Err(AmlError::AmlInvalidOpCode); - } - }; -} diff --git a/src/acpi/aml/pkglength.rs b/src/acpi/aml/pkglength.rs deleted file mode 100644 index 7b511f9b..00000000 --- a/src/acpi/aml/pkglength.rs +++ /dev/null @@ -1,25 +0,0 @@ -use super::AmlError; - -pub fn parse_pkg_length(data: &[u8]) -> Result<(usize, usize), AmlError> { - let lead_byte = data[0]; - let count_bytes: usize = (lead_byte >> 6) as usize; - - if count_bytes == 0 { - return Ok(((lead_byte & 0x3F) as usize, 1 as usize)); - } - - let upper_two = (lead_byte >> 4) & 0x03; - if upper_two != 0 { - return Err(AmlError::AmlParseError("Invalid package length")); - } - - let mut current_byte = 0; - let mut pkg_len: usize = (lead_byte & 0x0F) as usize; - - while current_byte < count_bytes { - pkg_len += (data[1 + current_byte] as u32 * 16 * (256 as u32).pow(current_byte as u32)) as usize; - current_byte += 1; - } - - Ok((pkg_len, count_bytes + 1)) -} diff --git a/src/acpi/aml/termlist.rs b/src/acpi/aml/termlist.rs deleted file mode 100644 index cd8e1033..00000000 --- a/src/acpi/aml/termlist.rs +++ /dev/null @@ -1,174 +0,0 @@ -use alloc::vec::Vec; - -use super::AmlError; -use super::parser::{ AmlParseType, ParseResult, AmlExecutionContext, ExecutionState }; -use super::namespace::{AmlValue, get_namespace_string}; -use super::namespacemodifier::parse_namespace_modifier; -use super::namedobj::parse_named_obj; -use super::dataobj::{parse_data_obj, parse_arg_obj, parse_local_obj}; -use super::type1opcode::parse_type1_opcode; -use super::type2opcode::parse_type2_opcode; -use super::namestring::parse_name_string; - -pub fn parse_term_list(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - let mut current_offset: usize = 0; - - while current_offset < data.len() { - let res = parse_term_obj(&data[current_offset..], ctx)?; - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: data.len() - }) - } - - current_offset += res.len; - } - - Ok(AmlParseType { - val: AmlValue::None, - len: data.len() - }) -} - -pub fn parse_term_arg(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_local_obj, - parse_data_obj, - parse_arg_obj, - parse_type2_opcode - }; - - Err(AmlError::AmlInvalidOpCode) -} - -pub fn parse_object_list(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - let mut current_offset: usize = 0; - - while current_offset < data.len() { - let res = parse_object(&data[current_offset..], ctx)?; - - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: data.len() - }) - } - - current_offset += res.len; - } - - Ok(AmlParseType { - val: AmlValue::None, - len: data.len() - }) -} - -fn parse_object(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_namespace_modifier, - parse_named_obj - }; - - Err(AmlError::AmlInvalidOpCode) -} - -pub fn parse_method_invocation(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - let name = parse_name_string(data, ctx)?; - let method = ctx.get(name.val.clone())?; - - let method = match method { - AmlValue::None => return Err(AmlError::AmlDeferredLoad), - _ => method.get_as_method()? - }; - - let mut cur = 0; - let mut params: Vec = vec!(); - - let mut current_offset = name.len; - - while cur < method.arg_count { - let res = parse_term_arg(&data[current_offset..], ctx)?; - - current_offset += res.len; - cur += 1; - - params.push(res.val); - } - - Ok(AmlParseType { - val: method.execute(get_namespace_string(ctx.scope.clone(), name.val)?, params), - len: current_offset - }) -} - -fn parse_term_obj(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_namespace_modifier, - parse_named_obj, - parse_type1_opcode, - parse_type2_opcode - }; - - Err(AmlError::AmlInvalidOpCode) -} diff --git a/src/acpi/aml/type1opcode.rs b/src/acpi/aml/type1opcode.rs deleted file mode 100644 index 2204e9c8..00000000 --- a/src/acpi/aml/type1opcode.rs +++ /dev/null @@ -1,472 +0,0 @@ -use super::AmlError; -use super::parser::{AmlParseType, ParseResult, AmlExecutionContext, ExecutionState}; -use super::namespace::AmlValue; -use super::pkglength::parse_pkg_length; -use super::termlist::{parse_term_arg, parse_term_list}; -use super::namestring::{parse_name_string, parse_super_name}; - -use time::monotonic; - -use acpi::{Sdt, load_table, get_sdt_signature}; -use super::{parse_aml_table, is_aml_table}; - -pub fn parse_type1_opcode(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_def_break, - parse_def_breakpoint, - parse_def_continue, - parse_def_noop, - parse_def_fatal, - parse_def_if_else, - parse_def_load, - parse_def_notify, - parse_def_release, - parse_def_reset, - parse_def_signal, - parse_def_sleep, - parse_def_stall, - parse_def_return, - parse_def_unload, - parse_def_while - }; - - Err(AmlError::AmlInvalidOpCode) -} - -fn parse_def_break(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0xA5); - ctx.state = ExecutionState::BREAK; - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 as usize - }) -} - -fn parse_def_breakpoint(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0xCC); - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 as usize - }) -} - -fn parse_def_continue(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x9F); - ctx.state = ExecutionState::CONTINUE; - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 as usize - }) -} - -fn parse_def_noop(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0xA3); - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 as usize - }) -} - -fn parse_def_fatal(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x32); - - let fatal_type = data[2]; - let fatal_code: u16 = (data[3] as u16) + ((data[4] as u16) << 8); - let fatal_arg = parse_term_arg(&data[5..], ctx)?; - - Err(AmlError::AmlFatalError(fatal_type, fatal_code, fatal_arg.val)) -} - -fn parse_def_load(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x20); - - let name = parse_name_string(&data[2..], ctx)?; - let ddb_handle_object = parse_super_name(&data[2 + name.len..], ctx)?; - - let tbl = ctx.get(name.val)?.get_as_buffer()?; - let sdt = unsafe { &*(tbl.as_ptr() as *const Sdt) }; - - if is_aml_table(sdt) { - load_table(get_sdt_signature(sdt)); - let delta = parse_aml_table(sdt)?; - let _ = ctx.modify(ddb_handle_object.val, AmlValue::DDBHandle((delta, get_sdt_signature(sdt)))); - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + name.len + ddb_handle_object.len - }) - } else { - Err(AmlError::AmlValueError) - } -} - -fn parse_def_notify(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x86); - - let object = parse_super_name(&data[1..], ctx)?; - let value = parse_term_arg(&data[1 + object.len..], ctx)?; - - let number = value.val.get_as_integer()? as u8; - - match ctx.get(object.val)? { - AmlValue::Device(d) => { - if let Some(methods) = d.notify_methods.get(&number) { - for method in methods { - method(); - } - } - }, - AmlValue::Processor(d) => { - if let Some(methods) = d.notify_methods.get(&number) { - for method in methods { - method(); - } - } - }, - AmlValue::ThermalZone(d) => { - if let Some(methods) = d.notify_methods.get(&number) { - for method in methods { - method(); - } - } - }, - _ => return Err(AmlError::AmlValueError) - } - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 + object.len + value.len - }) -} - -fn parse_def_release(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x27); - - let obj = parse_super_name(&data[2..], ctx)?; - let _ = ctx.release_mutex(obj.val); - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + obj.len - }) -} - -fn parse_def_reset(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x26); - - let object = parse_super_name(&data[2..], ctx)?; - ctx.get(object.val.clone())?.get_as_event()?; - - let _ = ctx.modify(object.val.clone(), AmlValue::Event(0)); - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + object.len - }) -} - -fn parse_def_signal(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x24); - let object = parse_super_name(&data[2..], ctx)?; - - ctx.signal_event(object.val)?; - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + object.len - }) -} - -fn parse_def_sleep(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x22); - - let time = parse_term_arg(&data[2..], ctx)?; - let timeout = time.val.get_as_integer()?; - - let (seconds, nanoseconds) = monotonic(); - let starting_time_ns = nanoseconds + (seconds * 1_000_000_000); - - loop { - let (seconds, nanoseconds) = monotonic(); - let current_time_ns = nanoseconds + (seconds * 1_000_000_000); - - if current_time_ns - starting_time_ns > timeout as u64 * 1_000_000 { - break; - } - } - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + time.len - }) -} - -fn parse_def_stall(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x21); - - let time = parse_term_arg(&data[2..], ctx)?; - let timeout = time.val.get_as_integer()?; - - let (seconds, nanoseconds) = monotonic(); - let starting_time_ns = nanoseconds + (seconds * 1_000_000_000); - - loop { - let (seconds, nanoseconds) = monotonic(); - let current_time_ns = nanoseconds + (seconds * 1_000_000_000); - - if current_time_ns - starting_time_ns > timeout as u64 * 1000 { - break; - } - } - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + time.len - }) -} - -fn parse_def_unload(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x2A); - - let object = parse_super_name(&data[2..], ctx)?; - - let delta = ctx.get(object.val)?.get_as_ddb_handle()?; - let mut namespace = ctx.prelock(); - - if let Some(ref mut ns) = *namespace { - for o in delta.0 { - ns.remove(&o); - } - } - - Ok(AmlParseType { - val: AmlValue::None, - len: 2 + object.len - }) -} - -fn parse_def_if_else(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0xA0); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[1..])?; - let if_condition = parse_term_arg(&data[1 + pkg_length_len .. 1 + pkg_length], ctx)?; - - let (else_length, else_length_len) = if data.len() > 1 + pkg_length && data[1 + pkg_length] == 0xA1 { - parse_pkg_length(&data[2 + pkg_length..])? - } else { - (0 as usize, 0 as usize) - }; - - if if_condition.val.get_as_integer()? > 0 { - parse_term_list(&data[1 + pkg_length_len + if_condition.len .. 1 + pkg_length], ctx)?; - } else if else_length > 0 { - parse_term_list(&data[2 + pkg_length + else_length_len .. 2 + pkg_length + else_length], ctx)?; - } - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 + pkg_length + if else_length > 0 { 1 + else_length } else { 0 } - }) -} - -fn parse_def_while(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0xA2); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[1..])?; - - loop { - let predicate = parse_term_arg(&data[1 + pkg_length_len..], ctx)?; - if predicate.val.get_as_integer()? == 0 { - break; - } - - parse_term_list(&data[1 + pkg_length_len + predicate.len .. 1 + pkg_length], ctx)?; - - match ctx.state { - ExecutionState::EXECUTING => (), - ExecutionState::BREAK => { - ctx.state = ExecutionState::EXECUTING; - break; - }, - ExecutionState::CONTINUE => ctx.state = ExecutionState::EXECUTING, - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - } - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 + pkg_length - }) -} - -fn parse_def_return(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0xA4); - - let arg_object = parse_term_arg(&data[1..], ctx)?; - ctx.state = ExecutionState::RETURN(arg_object.val); - - Ok(AmlParseType { - val: AmlValue::None, - len: 1 + arg_object.len - }) -} diff --git a/src/acpi/aml/type2opcode.rs b/src/acpi/aml/type2opcode.rs deleted file mode 100644 index 9b4e7ef1..00000000 --- a/src/acpi/aml/type2opcode.rs +++ /dev/null @@ -1,1779 +0,0 @@ -use alloc::boxed::Box; -use alloc::string::String; -use alloc::vec::Vec; - -use super::{AmlError, parse_aml_with_scope}; -use super::parser::{AmlParseType, ParseResult, AmlExecutionContext, ExecutionState}; -use super::namespace::{AmlValue, ObjectReference}; -use super::pkglength::parse_pkg_length; -use super::termlist::{parse_term_arg, parse_method_invocation}; -use super::namestring::{parse_super_name, parse_target, parse_name_string, parse_simple_name}; -use super::dataobj::parse_data_ref_obj; - -use time::monotonic; -use acpi::SDT_POINTERS; - -#[derive(Debug, Clone)] -pub enum MatchOpcode { - MTR, - MEQ, - MLE, - MLT, - MGE, - MGT -} - -pub fn parse_type2_opcode(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_def_increment, - parse_def_acquire, - parse_def_wait, - parse_def_land, - parse_def_lequal, - parse_def_lgreater, - parse_def_lless, - parse_def_lnot, - parse_def_lor, - parse_def_size_of, - parse_def_store, - parse_def_subtract, - parse_def_to_buffer, - parse_def_to_hex_string, - parse_def_to_bcd, - parse_def_to_decimal_string, - parse_def_to_integer, - parse_def_to_string, - parse_def_add, - parse_def_xor, - parse_def_shift_left, - parse_def_shift_right, - parse_def_mod, - parse_def_and, - parse_def_or, - parse_def_concat_res, - parse_def_concat, - parse_def_cond_ref_of, - parse_def_copy_object, - parse_def_decrement, - parse_def_divide, - parse_def_find_set_left_bit, - parse_def_find_set_right_bit, - parse_def_from_bcd, - parse_def_load_table, - parse_def_match, - parse_def_mid, - parse_def_multiply, - parse_def_nand, - parse_def_nor, - parse_def_not, - parse_def_timer, - parse_def_buffer, - parse_def_package, - parse_def_var_package, - parse_def_object_type, - parse_def_deref_of, - parse_def_ref_of, - parse_def_index, - parse_method_invocation - }; - - Err(AmlError::AmlInvalidOpCode) -} - -pub fn parse_type6_opcode(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_selector! { - data, ctx, - parse_def_deref_of, - parse_def_ref_of, - parse_def_index, - parse_method_invocation - }; - - Err(AmlError::AmlInvalidOpCode) -} - -pub fn parse_def_object_type(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x8E); - parser_selector! { - data, ctx, - parse_super_name, - parse_def_ref_of, - parse_def_deref_of, - parse_def_index - } - - Err(AmlError::AmlInvalidOpCode) -} - -pub fn parse_def_package(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - // TODO: Handle deferred loads in here - parser_opcode!(data, 0x12); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[1..])?; - let numelements = data[1 + pkg_length_len] as usize; - let mut elements = parse_package_elements_list(&data[2 + pkg_length_len .. 1 + pkg_length], ctx)?.val.get_as_package()?; - - if elements.len() > numelements { - elements = elements[0 .. numelements].to_vec(); - } else if numelements > elements.len() { - for _ in 0..numelements - elements.len() { - elements.push(AmlValue::Uninitialized); - } - } - - Ok(AmlParseType { - val: AmlValue::Package(elements), - len: 1 + pkg_length - }) -} - -pub fn parse_def_var_package(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - // TODO: Handle deferred loads in here - parser_opcode!(data, 0x13); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[1..])?; - let num_elements = parse_term_arg(&data[1 + pkg_length_len .. 1 + pkg_length], ctx)?; - let mut elements = parse_package_elements_list(&data[1 + pkg_length_len + num_elements.len .. - 1 + pkg_length], ctx)?.val.get_as_package()?; - - let numelements = num_elements.val.get_as_integer()? as usize; - - if elements.len() > numelements { - elements = elements[0 .. numelements].to_vec(); - } else if numelements > elements.len() { - for _ in 0..numelements - elements.len() { - elements.push(AmlValue::Uninitialized); - } - } - - Ok(AmlParseType { - val: AmlValue::Package(elements), - len: 1 + pkg_length - }) -} - -fn parse_package_elements_list(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - let mut current_offset: usize = 0; - let mut elements: Vec = vec!(); - - while current_offset < data.len() { - let dro = if let Ok(e) = parse_data_ref_obj(&data[current_offset..], ctx) { - e - } else { - let d = parse_name_string(&data[current_offset..], ctx)?; - AmlParseType { - val: AmlValue::ObjectReference(ObjectReference::Object(d.val.get_as_string()?)), - len: d.len - } - }; - - elements.push(dro.val); - current_offset += dro.len; - } - - Ok(AmlParseType { - val: AmlValue::Package(elements), - len: data.len() - }) -} - -pub fn parse_def_buffer(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x11); - - let (pkg_length, pkg_length_len) = parse_pkg_length(&data[1..])?; - let buffer_size = parse_term_arg(&data[1 + pkg_length_len..], ctx)?; - let mut byte_list = data[1 + pkg_length_len + buffer_size.len .. 1 + pkg_length].to_vec().clone(); - - byte_list.truncate(buffer_size.val.get_as_integer()? as usize); - - Ok(AmlParseType { - val: AmlValue::Buffer(byte_list), - len: 1 + pkg_length - }) -} - -fn parse_def_ref_of(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x71); - - let obj = parse_super_name(&data[1..], ctx)?; - let res = match obj.val { - AmlValue::String(ref s) => { - match ctx.get(AmlValue::String(s.clone()))? { - AmlValue::None => return Err(AmlError::AmlValueError), - _ => ObjectReference::Object(s.clone()) - } - }, - AmlValue::ObjectReference(ref o) => o.clone(), - _ => return Err(AmlError::AmlValueError) - }; - - Ok(AmlParseType { - val: AmlValue::ObjectReference(res), - len: 1 + obj.len - }) -} - -fn parse_def_deref_of(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x83); - - let obj = parse_term_arg(&data[1..], ctx)?; - let res = ctx.get(obj.val)?; - - match res { - AmlValue::None => Err(AmlError::AmlValueError), - _ => Ok(AmlParseType { - val: res, - len: 1 + obj.len - }) - } -} - -fn parse_def_acquire(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x23); - - let obj = parse_super_name(&data[1..], ctx)?; - let timeout = (data[2 + obj.len] as u16) + ((data[3 + obj.len] as u16) << 8); - - let (seconds, nanoseconds) = monotonic(); - let starting_time_ns = nanoseconds + (seconds * 1_000_000_000); - - loop { - match ctx.acquire_mutex(obj.val.clone()) { - Err(e) => return Err(e), - Ok(b) => if b { - return Ok(AmlParseType { - val: AmlValue::Integer(0), - len: 4 + obj.len - }); - } else if timeout == 0xFFFF { - // TODO: Brief sleep here - } else { - let (seconds, nanoseconds) = monotonic(); - let current_time_ns = nanoseconds + (seconds * 1_000_000_000); - - if current_time_ns - starting_time_ns > timeout as u64 * 1_000_000 { - return Ok(AmlParseType { - val: AmlValue::Integer(1), - len: 4 + obj.len - }); - } - } - } - } -} - -fn parse_def_increment(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x75); - - let obj = parse_super_name(&data[1..], ctx)?; - - let _namespace = ctx.prelock(); - let value = AmlValue::Integer(ctx.get(obj.val.clone())?.get_as_integer()? + 1); - let _ = ctx.modify(obj.val, value.clone()); - - Ok(AmlParseType { - val: value, - len: 1 + obj.len - }) -} - -fn parse_def_index(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x88); - - let obj = parse_term_arg(&data[1..], ctx)?; - let idx = parse_term_arg(&data[1 + obj.len..], ctx)?; - let target = parse_target(&data[1 + obj.len + idx.len..], ctx)?; - - let reference = AmlValue::ObjectReference(ObjectReference::Index(Box::new(obj.val), Box::new(idx.val))); - let _ = ctx.modify(target.val, reference.clone()); - - Ok(AmlParseType { - val: reference, - len: 1 + obj.len + idx.len + target.len - }) -} - -fn parse_def_land(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x90); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - - let result = if lhs.val.get_as_integer()? > 0 && rhs.val.get_as_integer()? > 0 { 1 } else { 0 }; - - Ok(AmlParseType { - val: AmlValue::IntegerConstant(result), - len: 1 + lhs.len + rhs.len - }) -} - -fn parse_def_lequal(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x93); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - - let result = if lhs.val.get_as_integer()? == rhs.val.get_as_integer()? { 1 } else { 0 }; - - Ok(AmlParseType { - val: AmlValue::IntegerConstant(result), - len: 1 + lhs.len + rhs.len - }) -} - -fn parse_def_lgreater(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x94); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - - let result = if lhs.val.get_as_integer()? > rhs.val.get_as_integer()? { 1 } else { 0 }; - - Ok(AmlParseType { - val: AmlValue::IntegerConstant(result), - len: 1 + lhs.len + rhs.len - }) -} - -fn parse_def_lless(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x95); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - - let result = if lhs.val.get_as_integer()? < rhs.val.get_as_integer()? { 1 } else { 0 }; - - Ok(AmlParseType { - val: AmlValue::IntegerConstant(result), - len: 1 + lhs.len + rhs.len - }) -} - -fn parse_def_lnot(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x92); - - let operand = parse_term_arg(&data[1..], ctx)?; - let result = if operand.val.get_as_integer()? == 0 { 1 } else { 0 }; - - Ok(AmlParseType { - val: AmlValue::IntegerConstant(result), - len: 1 + operand.len - }) -} - -fn parse_def_lor(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x91); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - - let result = if lhs.val.get_as_integer()? > 0 || rhs.val.get_as_integer()? > 0 { 1 } else { 0 }; - - Ok(AmlParseType { - val: AmlValue::IntegerConstant(result), - len: 1 + lhs.len + rhs.len - }) -} - -fn parse_def_to_hex_string(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x98); - - let operand = parse_term_arg(&data[2..], ctx)?; - let target = parse_target(&data[2 + operand.len..], ctx)?; - - let res = match operand.val { - AmlValue::Integer(_) => { - let result: String = format!("{:X}", operand.val.get_as_integer()?); - AmlValue::String(result) - }, - AmlValue::String(s) => AmlValue::String(s), - AmlValue::Buffer(_) => { - let mut string: String = String::new(); - - for b in operand.val.get_as_buffer()? { - string.push_str(&format!("{:X}", b)); - } - - AmlValue::String(string) - }, - _ => return Err(AmlError::AmlValueError) - }; - - let _ = ctx.modify(target.val, res.clone()); - - Ok(AmlParseType { - val: res, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_to_buffer(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x96); - - let operand = parse_term_arg(&data[2..], ctx)?; - let target = parse_target(&data[2 + operand.len..], ctx)?; - - let res = AmlValue::Buffer(operand.val.get_as_buffer()?); - let _ = ctx.modify(target.val, res.clone()); - - Ok(AmlParseType { - val: res, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_to_bcd(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x29); - - let operand = parse_term_arg(&data[2..], ctx)?; - let target = parse_target(&data[2 + operand.len..], ctx)?; - - let mut i = operand.val.get_as_integer()?; - let mut result = 0; - - while i != 0 { - result <<= 4; - result += i % 10; - i /= 10; - } - - let result = AmlValue::Integer(result); - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_to_decimal_string(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x97); - - let operand = parse_term_arg(&data[2..], ctx)?; - let target = parse_target(&data[2 + operand.len..], ctx)?; - let res = match operand.val { - AmlValue::Integer(_) => { - let result: String = format!("{}", operand.val.get_as_integer()?); - AmlValue::String(result) - }, - AmlValue::String(s) => AmlValue::String(s), - AmlValue::Buffer(_) => { - let mut string: String = String::new(); - - for b in operand.val.get_as_buffer()? { - string.push_str(&format!("{}", b)); - } - - AmlValue::String(string) - }, - _ => return Err(AmlError::AmlValueError) - }; - - let _ = ctx.modify(target.val, res.clone()); - - Ok(AmlParseType { - val: res, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_to_integer(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x99); - - let operand = parse_term_arg(&data[2..], ctx)?; - let target = parse_target(&data[2 + operand.len..], ctx)?; - - let res = AmlValue::Integer(operand.val.get_as_integer()?); - - let _ = ctx.modify(target.val, res.clone()); - - Ok(AmlParseType { - val: res, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_to_string(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x9C); - - let operand = parse_term_arg(&data[1..], ctx)?; - let length = parse_term_arg(&data[1 + operand.len..], ctx)?; - let target = parse_target(&data[1 + operand.len + length.len..], ctx)?; - - let buf = operand.val.get_as_buffer()?; - let mut string = match String::from_utf8(buf) { - Ok(s) => s, - Err(_) => return Err(AmlError::AmlValueError) - }; - - string.truncate(length.val.get_as_integer()? as usize); - let res = AmlValue::String(string); - - let _ = ctx.modify(target.val, res.clone()); - - Ok(AmlParseType { - val: res, - len: 1 + operand.len + length.len + target.len - }) -} - -fn parse_def_subtract(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x74); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(lhs.val.get_as_integer()? - rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_size_of(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x87); - - let name = parse_super_name(&data[1..], ctx)?; - let obj = ctx.get(name.val)?; - - let res = match obj { - AmlValue::Buffer(ref v) => v.len(), - AmlValue::String(ref s) => s.len(), - AmlValue::Package(ref p) => p.len(), - _ => return Err(AmlError::AmlValueError) - }; - - Ok(AmlParseType { - val: AmlValue::Integer(res as u64), - len: 1 + name.len - }) -} - -fn parse_def_store(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x70); - - let operand = parse_term_arg(&data[1..], ctx)?; - let target = parse_super_name(&data[1 + operand.len..], ctx)?; - - let _ = ctx.modify(target.val.clone(), operand.val); - - Ok(AmlParseType { - val: target.val, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_or(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x7D); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(lhs.val.get_as_integer()? | rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_shift_left(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x79); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(lhs.val.get_as_integer()? >> rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_shift_right(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x7A); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(lhs.val.get_as_integer()? << rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_add(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x72); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(lhs.val.get_as_integer()? + rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_and(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x7B); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(lhs.val.get_as_integer()? & rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_xor(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x7F); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(lhs.val.get_as_integer()? ^ rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_concat_res(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x84); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let mut buf1 = lhs.val.get_as_buffer()?.clone(); - let mut buf2 = rhs.val.get_as_buffer()?.clone(); - - if buf1.len() == 1 || buf2.len() == 1 { - return Err(AmlError::AmlValueError); - } - - if buf1.len() >= 2 && buf1[buf1.len() - 2] == 0x79 { - buf1 = buf1[0..buf1.len() - 2].to_vec(); - } - - if buf2.len() >= 2 && buf2[buf2.len() - 2] == 0x79 { - buf2 = buf2[0..buf2.len() - 2].to_vec(); - } - - buf1.append(&mut buf2); - buf1.push(0x79); - - let mut checksum: u8 = 0; - let loopbuf = buf1.clone(); - for b in loopbuf { - checksum += b; - } - - checksum = (!checksum) + 1; - buf1.push(checksum); - - let res = AmlValue::Buffer(buf1); - ctx.modify(target.val, res.clone())?; - - Ok(AmlParseType { - val: res, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_wait(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x25); - - let obj = parse_super_name(&data[2..], ctx)?; - let timeout_obj = parse_term_arg(&data[2 + obj.len..], ctx)?; - - let timeout = timeout_obj.val.get_as_integer()?; - - let (seconds, nanoseconds) = monotonic(); - let starting_time_ns = nanoseconds + (seconds * 1_000_000_000); - - loop { - match ctx.wait_for_event(obj.val.clone()) { - Err(e) => return Err(e), - Ok(b) => if b { - return Ok(AmlParseType { - val: AmlValue::Integer(0), - len: 2 + obj.len + timeout_obj.len - }) - } else if timeout >= 0xFFFF { - // TODO: Brief sleep here - } else { - let (seconds, nanoseconds) = monotonic(); - let current_time_ns = nanoseconds + (seconds * 1_000_000_000); - - if current_time_ns - starting_time_ns > timeout as u64 * 1_000_000 { - return Ok(AmlParseType { - val: AmlValue::Integer(1), - len: 2 + obj.len + timeout_obj.len - }); - } - } - } - } -} - -fn parse_def_cond_ref_of(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x12); - - let obj = parse_super_name(&data[2..], ctx)?; - let target = parse_target(&data[2 + obj.len..], ctx)?; - - let res = match obj.val { - AmlValue::String(ref s) => { - match ctx.get(AmlValue::String(s.clone()))? { - AmlValue::None => return Ok(AmlParseType { - val: AmlValue::Integer(0), - len: 1 + obj.len + target.len - }), - _ => ObjectReference::Object(s.clone()) - } - }, - AmlValue::ObjectReference(ref o) => o.clone(), - _ => return Err(AmlError::AmlValueError) - }; - - let _ = ctx.modify(target.val, AmlValue::ObjectReference(res)); - - Ok(AmlParseType { - val: AmlValue::Integer(1), - len: 1 + obj.len + target.len - }) -} - -fn parse_def_copy_object(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - // TODO: Compute the result - // TODO: Store the result - parser_opcode!(data, 0x9D); - - let source = parse_term_arg(&data[1..], ctx)?; - let destination = parse_simple_name(&data[1 + source.len..], ctx)?; - - ctx.copy(destination.val, source.val.clone())?; - - Ok(AmlParseType { - val: source.val, - len: 1 + source.len + destination.len - }) -} - -fn parse_def_concat(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x73); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = match lhs.val { - AmlValue::Integer(_i) => { - let j = AmlValue::Integer(rhs.val.get_as_integer()?); - - let mut first = lhs.val.get_as_buffer()?.clone(); - let mut second = j.get_as_buffer()?.clone(); - - first.append(&mut second); - - AmlValue::Buffer(first) - }, - AmlValue::String(s) => { - let t = if let Ok(t) = rhs.val.get_as_string() { - t - } else { - rhs.val.get_type_string() - }; - - AmlValue::String(format!("{}{}", s, t)) - }, - AmlValue::Buffer(b) => { - let mut b = b.clone(); - let mut c = if let Ok(c) = rhs.val.get_as_buffer() { - c.clone() - } else { - AmlValue::String(rhs.val.get_type_string()).get_as_buffer()?.clone() - }; - - b.append(&mut c); - - AmlValue::Buffer(b) - }, - _ => { - let first = lhs.val.get_type_string(); - let second = if let Ok(second) = rhs.val.get_as_string() { - second - } else { - rhs.val.get_type_string() - }; - - AmlValue::String(format!("{}{}", first, second)) - } - }; - - ctx.modify(target.val, result.clone())?; - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_decrement(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x76); - - let obj = parse_super_name(&data[1..], ctx)?; - - let _namespace = ctx.prelock(); - let value = AmlValue::Integer(ctx.get(obj.val.clone())?.get_as_integer()? - 1); - let _ = ctx.modify(obj.val, value.clone()); - - Ok(AmlParseType { - val: value, - len: 1 + obj.len - }) -} - -fn parse_def_divide(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x78); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target_remainder = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - let target_quotient = parse_target(&data[1 + lhs.len + rhs.len + target_remainder.len..], ctx)?; - - let numerator = lhs.val.get_as_integer()?; - let denominator = rhs.val.get_as_integer()?; - - let remainder = numerator % denominator; - let quotient = (numerator - remainder) / denominator; - - let _ = ctx.modify(target_remainder.val, AmlValue::Integer(remainder)); - let _ = ctx.modify(target_quotient.val, AmlValue::Integer(quotient)); - - Ok(AmlParseType { - val: AmlValue::Integer(quotient), - len: 1 + lhs.len + rhs.len + target_remainder.len + target_quotient.len - }) -} - -fn parse_def_find_set_left_bit(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x81); - - let operand = parse_term_arg(&data[2..], ctx)?; - let target = parse_target(&data[2 + operand.len..], ctx)?; - - let mut first_bit = 32; - let mut test = operand.val.get_as_integer()?; - - while first_bit > 0{ - if test & 0x8000_0000_0000_0000 > 0 { - break; - } - - test <<= 1; - first_bit -= 1; - } - - let result = AmlValue::Integer(first_bit); - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_find_set_right_bit(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x82); - - let operand = parse_term_arg(&data[2..], ctx)?; - let target = parse_target(&data[2 + operand.len..], ctx)?; - - let mut first_bit = 1; - let mut test = operand.val.get_as_integer()?; - - while first_bit <= 32 { - if test & 1 > 0 { - break; - } - - test >>= 1; - first_bit += 1; - } - - if first_bit == 33 { - first_bit = 0; - } - - let result = AmlValue::Integer(first_bit); - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_load_table(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - // TODO: Clean up - parser_opcode_extended!(data, 0x1F); - - let signature = parse_term_arg(&data[2..], ctx)?; - let oem_id = parse_term_arg(&data[2 + signature.len..], ctx)?; - let oem_table_id = parse_term_arg(&data[2 + signature.len + oem_id.len..], ctx)?; - let root_path = parse_term_arg(&data[2 + signature.len + oem_id.len + oem_table_id.len..], ctx)?; - let parameter_path = parse_term_arg(&data[2 + signature.len + oem_id.len + oem_table_id.len + root_path.len..], ctx)?; - let parameter_data = parse_term_arg(&data[2 + signature.len + oem_id.len + oem_table_id.len + root_path.len + parameter_path.len..], ctx)?; - - if let Some(ref ptrs) = *(SDT_POINTERS.read()) { - let sig_str = unsafe { - let sig = *(signature.val.get_as_string()?.as_bytes().as_ptr() as *const [u8; 4]); - String::from_utf8(sig.to_vec()).expect("Error converting signature to string") - }; - let oem_str = unsafe { - *(oem_id.val.get_as_string()?.as_bytes().as_ptr() as *const [u8; 6]) - }; - let oem_table_str = unsafe { - *(oem_table_id.val.get_as_string()?.as_bytes().as_ptr() as *const [u8; 8]) - }; - - let sdt_signature = (sig_str, oem_str, oem_table_str); - - let sdt = ptrs.get(&sdt_signature); - - if let Some(sdt) = sdt { - let hdl = parse_aml_with_scope(sdt, root_path.val.get_as_string()?)?; - let _ = ctx.modify(parameter_path.val, parameter_data.val); - - return Ok(AmlParseType { - val: AmlValue::DDBHandle((hdl, sdt_signature)), - len: 2 + signature.len + oem_id.len + oem_table_id.len + root_path.len + parameter_path.len + parameter_data.len - }); - } - } - - Ok(AmlParseType { - val: AmlValue::IntegerConstant(0), - len: 2 + signature.len + oem_id.len + oem_table_id.len + root_path.len + parameter_path.len + parameter_data.len - }) -} - -fn parse_def_match(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x28); - - let search_pkg = parse_term_arg(&data[1..], ctx)?; - - let first_operation = match data[1 + search_pkg.len] { - 0 => MatchOpcode::MTR, - 1 => MatchOpcode::MEQ, - 2 => MatchOpcode::MLE, - 3 => MatchOpcode::MLT, - 4 => MatchOpcode::MGE, - 5 => MatchOpcode::MGT, - _ => return Err(AmlError::AmlParseError("DefMatch - Invalid Opcode")) - }; - let first_operand = parse_term_arg(&data[2 + search_pkg.len..], ctx)?; - - let second_operation = match data[2 + search_pkg.len + first_operand.len] { - 0 => MatchOpcode::MTR, - 1 => MatchOpcode::MEQ, - 2 => MatchOpcode::MLE, - 3 => MatchOpcode::MLT, - 4 => MatchOpcode::MGE, - 5 => MatchOpcode::MGT, - _ => return Err(AmlError::AmlParseError("DefMatch - Invalid Opcode")) - }; - let second_operand = parse_term_arg(&data[3 + search_pkg.len + first_operand.len..], ctx)?; - - let start_index = parse_term_arg(&data[3 + search_pkg.len + first_operand.len + second_operand.len..], ctx)?; - - let pkg = search_pkg.val.get_as_package()?; - let mut idx = start_index.val.get_as_integer()? as usize; - - match first_operand.val { - AmlValue::Integer(i) => { - let j = second_operand.val.get_as_integer()?; - - while idx < pkg.len() { - let val = if let Ok(v) = pkg[idx].get_as_integer() { v } else { idx += 1; continue; }; - idx += 1; - - match first_operation { - MatchOpcode::MTR => (), - MatchOpcode::MEQ => if val != i { continue }, - MatchOpcode::MLE => if val > i { continue }, - MatchOpcode::MLT => if val >= i { continue }, - MatchOpcode::MGE => if val < i { continue }, - MatchOpcode::MGT => if val <= i { continue } - } - - match second_operation { - MatchOpcode::MTR => (), - MatchOpcode::MEQ => if val != j { continue }, - MatchOpcode::MLE => if val > j { continue }, - MatchOpcode::MLT => if val >= j { continue }, - MatchOpcode::MGE => if val < j { continue }, - MatchOpcode::MGT => if val <= j { continue } - } - - return Ok(AmlParseType { - val: AmlValue::Integer(idx as u64), - len: 3 + search_pkg.len + first_operand.len + second_operand.len + start_index.len - }) - } - }, - AmlValue::String(i) => { - let j = second_operand.val.get_as_string()?; - - while idx < pkg.len() { - let val = if let Ok(v) = pkg[idx].get_as_string() { v } else { idx += 1; continue; }; - idx += 1; - - match first_operation { - MatchOpcode::MTR => (), - MatchOpcode::MEQ => if val != i { continue }, - MatchOpcode::MLE => if val > i { continue }, - MatchOpcode::MLT => if val >= i { continue }, - MatchOpcode::MGE => if val < i { continue }, - MatchOpcode::MGT => if val <= i { continue } - } - - match second_operation { - MatchOpcode::MTR => (), - MatchOpcode::MEQ => if val != j { continue }, - MatchOpcode::MLE => if val > j { continue }, - MatchOpcode::MLT => if val >= j { continue }, - MatchOpcode::MGE => if val < j { continue }, - MatchOpcode::MGT => if val <= j { continue } - } - - return Ok(AmlParseType { - val: AmlValue::Integer(idx as u64), - len: 3 + search_pkg.len + first_operand.len + second_operand.len + start_index.len - }) - } - }, - _ => { - let i = first_operand.val.get_as_buffer()?; - let j = second_operand.val.get_as_buffer()?; - - while idx < pkg.len() { - let val = if let Ok(v) = pkg[idx].get_as_buffer() { v } else { idx += 1; continue; }; - idx += 1; - - match first_operation { - MatchOpcode::MTR => (), - MatchOpcode::MEQ => if val != i { continue }, - MatchOpcode::MLE => if val > i { continue }, - MatchOpcode::MLT => if val >= i { continue }, - MatchOpcode::MGE => if val < i { continue }, - MatchOpcode::MGT => if val <= i { continue } - } - - match second_operation { - MatchOpcode::MTR => (), - MatchOpcode::MEQ => if val != j { continue }, - MatchOpcode::MLE => if val > j { continue }, - MatchOpcode::MLT => if val >= j { continue }, - MatchOpcode::MGE => if val < j { continue }, - MatchOpcode::MGT => if val <= j { continue } - } - - return Ok(AmlParseType { - val: AmlValue::Integer(idx as u64), - len: 3 + search_pkg.len + first_operand.len + second_operand.len + start_index.len - }) - } - } - } - - Ok(AmlParseType { - val: AmlValue::IntegerConstant(0xFFFF_FFFF_FFFF_FFFF), - len: 3 + search_pkg.len + first_operand.len + second_operand.len + start_index.len - }) -} - -fn parse_def_from_bcd(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x28); - - let operand = parse_term_arg(&data[2..], ctx)?; - let target = parse_target(&data[2 + operand.len..], ctx)?; - - let mut i = operand.val.get_as_integer()?; - let mut result = 0; - - while i != 0 { - if i & 0x0F > 10 { - return Err(AmlError::AmlValueError); - } - - result *= 10; - result += i & 0x0F; - i >>= 4; - } - - let result = AmlValue::Integer(result); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 2 + operand.len + target.len - }) -} - -fn parse_def_mid(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x9E); - - let source = parse_term_arg(&data[1..], ctx)?; - let index = parse_term_arg(&data[1 + source.len..], ctx)?; - let length = parse_term_arg(&data[1 + source.len + index.len..], ctx)?; - let target = parse_target(&data[1 + source.len + index.len + length.len..], ctx)?; - - let idx = index.val.get_as_integer()? as usize; - let mut len = length.val.get_as_integer()? as usize; - - let result = match source.val { - AmlValue::String(s) => { - if idx > s.len() { - AmlValue::String(String::new()) - } else { - let mut res = s.clone().split_off(idx); - - if len < res.len() { - res.split_off(len); - } - - AmlValue::String(res) - } - }, - _ => { - // If it isn't a string already, treat it as a buffer. Must perform that check first, - // as Mid can operate on both strings and buffers, but a string can be cast as a buffer - // implicitly. - // Additionally, any type that can be converted to a buffer can also be converted to a - // string, so no information is lost - let b = source.val.get_as_buffer()?; - - if idx > b.len() { - AmlValue::Buffer(vec!()) - } else { - if idx + len > b.len() { - len = b.len() - idx; - } - - AmlValue::Buffer(b[idx .. idx + len].to_vec()) - } - } - }; - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + source.len + index.len + length.len + target.len - }) -} - -fn parse_def_mod(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x85); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - if rhs.val.get_as_integer()? == 0 { - return Err(AmlError::AmlValueError); - } - - let result = AmlValue::Integer(lhs.val.get_as_integer()? % rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_multiply(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - // TODO: Handle overflow - parser_opcode!(data, 0x77); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(lhs.val.get_as_integer()? * rhs.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_nand(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x7C); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(!(lhs.val.get_as_integer()? & rhs.val.get_as_integer()?)); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_nor(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x7E); - - let lhs = parse_term_arg(&data[1..], ctx)?; - let rhs = parse_term_arg(&data[1 + lhs.len..], ctx)?; - let target = parse_target(&data[1 + lhs.len + rhs.len..], ctx)?; - - let result = AmlValue::Integer(!(lhs.val.get_as_integer()? | rhs.val.get_as_integer()?)); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + lhs.len + rhs.len + target.len - }) -} - -fn parse_def_not(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode!(data, 0x80); - - let operand = parse_term_arg(&data[1..], ctx)?; - let target = parse_target(&data[1 + operand.len..], ctx)?; - - let result = AmlValue::Integer(!operand.val.get_as_integer()?); - - let _ = ctx.modify(target.val, result.clone()); - - Ok(AmlParseType { - val: result, - len: 1 + operand.len + target.len - }) -} - -fn parse_def_timer(data: &[u8], - ctx: &mut AmlExecutionContext) -> ParseResult { - match ctx.state { - ExecutionState::EXECUTING => (), - _ => return Ok(AmlParseType { - val: AmlValue::None, - len: 0 - }) - } - - parser_opcode_extended!(data, 0x33); - - let (seconds, nanoseconds) = monotonic(); - let monotonic_ns = nanoseconds + (seconds * 1_000_000_000); - - Ok(AmlParseType { - val: AmlValue::Integer(monotonic_ns), - len: 2 as usize - }) -} diff --git a/src/acpi/dmar/drhd.rs b/src/acpi/dmar/drhd.rs deleted file mode 100644 index 494917ef..00000000 --- a/src/acpi/dmar/drhd.rs +++ /dev/null @@ -1,77 +0,0 @@ -#[repr(packed)] -pub struct DrhdFault { - pub sts: u32, - pub ctrl: u32, - pub data: u32, - pub addr: [u32; 2], - _rsv: [u64; 2], - pub log: u64, -} - -#[repr(packed)] -pub struct DrhdProtectedMemory { - pub en: u32, - pub low_base: u32, - pub low_limit: u32, - pub high_base: u64, - pub high_limit: u64, -} - -#[repr(packed)] -pub struct DrhdInvalidation { - pub queue_head: u64, - pub queue_tail: u64, - pub queue_addr: u64, - _rsv: u32, - pub cmpl_sts: u32, - pub cmpl_ctrl: u32, - pub cmpl_data: u32, - pub cmpl_addr: [u32; 2], -} - -#[repr(packed)] -pub struct DrhdPageRequest { - pub queue_head: u64, - pub queue_tail: u64, - pub queue_addr: u64, - _rsv: u32, - pub sts: u32, - pub ctrl: u32, - pub data: u32, - pub addr: [u32; 2], -} - -#[repr(packed)] -pub struct DrhdMtrrVariable { - pub base: u64, - pub mask: u64, -} - -#[repr(packed)] -pub struct DrhdMtrr { - pub cap: u64, - pub def_type: u64, - pub fixed: [u64; 11], - pub variable: [DrhdMtrrVariable; 10], -} - -#[repr(packed)] -pub struct Drhd { - pub version: u32, - _rsv: u32, - pub cap: u64, - pub ext_cap: u64, - pub gl_cmd: u32, - pub gl_sts: u32, - pub root_table: u64, - pub ctx_cmd: u64, - _rsv1: u32, - pub fault: DrhdFault, - _rsv2: u32, - pub pm: DrhdProtectedMemory, - pub invl: DrhdInvalidation, - _rsv3: u64, - pub intr_table: u64, - pub page_req: DrhdPageRequest, - pub mtrr: DrhdMtrr, -} diff --git a/src/acpi/dmar/mod.rs b/src/acpi/dmar/mod.rs deleted file mode 100644 index 11687181..00000000 --- a/src/acpi/dmar/mod.rs +++ /dev/null @@ -1,218 +0,0 @@ -use core::mem; - -use super::sdt::Sdt; -use self::drhd::Drhd; -use memory::Frame; -use paging::{ActivePageTable, PhysicalAddress}; -use paging::entry::EntryFlags; - -use super::{find_sdt, load_table, get_sdt_signature}; - -pub mod drhd; - -/// The DMA Remapping Table -#[derive(Debug)] -pub struct Dmar { - sdt: &'static Sdt, - pub addr_width: u8, - pub flags: u8, - _rsv: [u8; 10], -} - -impl Dmar { - pub fn init(active_table: &mut ActivePageTable) { - let dmar_sdt = find_sdt("DMAR"); - let dmar = if dmar_sdt.len() == 1 { - load_table(get_sdt_signature(dmar_sdt[0])); - Dmar::new(dmar_sdt[0]) - } else { - println!("Unable to find DMAR"); - return; - }; - - if let Some(dmar) = dmar { - println!(" DMAR: {}: {}", dmar.addr_width, dmar.flags); - - for dmar_entry in dmar.iter() { - println!(" {:?}", dmar_entry); - match dmar_entry { - DmarEntry::Drhd(dmar_drhd) => { - let drhd = dmar_drhd.get(active_table); - - println!("VER: {:X}", drhd.version); - println!("CAP: {:X}", drhd.cap); - println!("EXT_CAP: {:X}", drhd.ext_cap); - println!("GCMD: {:X}", drhd.gl_cmd); - println!("GSTS: {:X}", drhd.gl_sts); - println!("RT: {:X}", drhd.root_table); - }, - _ => () - } - } - } - } - - pub fn new(sdt: &'static Sdt) -> Option { - if &sdt.signature == b"DMAR" && sdt.data_len() >= 12 { //Not valid if no local address and flags - let addr_width = unsafe { *(sdt.data_address() as *const u8) }; - let flags = unsafe { *(sdt.data_address() as *const u8).offset(1) }; - let rsv: [u8; 10] = unsafe { *((sdt.data_address() as *const u8).offset(2) as *const [u8; 10]) }; - - Some(Dmar { - sdt: sdt, - addr_width: addr_width, - flags: flags, - _rsv: rsv, - }) - } else { - None - } - } - - pub fn iter(&self) -> DmarIter { - DmarIter { - sdt: self.sdt, - i: 12 // Skip address width and flags - } - } -} - -/// - -/// DMAR DMA Remapping Hardware Unit Definition -// TODO: Implement iterator on DmarDrhd scope -#[derive(Debug)] -#[repr(packed)] -pub struct DmarDrhd { - kind: u16, - length: u16, - flags: u8, - _rsv: u8, - segment: u16, - base: u64, -} - -impl DmarDrhd { - pub fn get(&self, active_table: &mut ActivePageTable) -> &'static mut Drhd { - let result = active_table.identity_map(Frame::containing_address(PhysicalAddress::new(self.base as usize)), EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE); - result.flush(active_table); - unsafe { &mut *(self.base as *mut Drhd) } - } -} - -/// DMAR Reserved Memory Region Reporting -// TODO: Implement iterator on DmarRmrr scope -#[derive(Debug)] -#[repr(packed)] -pub struct DmarRmrr { - kind: u16, - length: u16, - _rsv: u16, - segment: u16, - base: u64, - limit: u64, -} - -/// DMAR Root Port ATS Capability Reporting -// TODO: Implement iterator on DmarAtsr scope -#[derive(Debug)] -#[repr(packed)] -pub struct DmarAtsr { - kind: u16, - length: u16, - flags: u8, - _rsv: u8, - segment: u16, -} - -/// DMAR Remapping Hardware Static Affinity -#[derive(Debug)] -#[repr(packed)] -pub struct DmarRhsa { - kind: u16, - length: u16, - _rsv: u32, - base: u64, - domain: u32, -} - -/// DMAR ACPI Name-space Device Declaration -// TODO: Implement iterator on DmarAndd object name -#[derive(Debug)] -#[repr(packed)] -pub struct DmarAndd { - kind: u16, - length: u16, - _rsv: [u8; 3], - acpi_dev: u8, -} - -/// DMAR Entries -#[derive(Debug)] -pub enum DmarEntry { - Drhd(&'static DmarDrhd), - InvalidDrhd(usize), - Rmrr(&'static DmarRmrr), - InvalidRmrr(usize), - Atsr(&'static DmarAtsr), - InvalidAtsr(usize), - Rhsa(&'static DmarRhsa), - InvalidRhsa(usize), - Andd(&'static DmarAndd), - InvalidAndd(usize), - Unknown(u16) -} - -pub struct DmarIter { - sdt: &'static Sdt, - i: usize -} - -impl Iterator for DmarIter { - type Item = DmarEntry; - fn next(&mut self) -> Option { - if self.i + 4 <= self.sdt.data_len() { - let entry_type = unsafe { *((self.sdt.data_address() as *const u8).offset(self.i as isize) as *const u16) }; - let entry_len = unsafe { *((self.sdt.data_address() as *const u8).offset(self.i as isize + 2) as *const u16) } as usize; - - if self.i + entry_len <= self.sdt.data_len() { - let item = match entry_type { - 0 => if entry_len >= mem::size_of::() { - DmarEntry::Drhd(unsafe { &*((self.sdt.data_address() + self.i) as *const DmarDrhd) }) - } else { - DmarEntry::InvalidDrhd(entry_len) - }, - 1 => if entry_len >= mem::size_of::() { - DmarEntry::Rmrr(unsafe { &*((self.sdt.data_address() + self.i) as *const DmarRmrr) }) - } else { - DmarEntry::InvalidRmrr(entry_len) - }, - 2 => if entry_len >= mem::size_of::() { - DmarEntry::Atsr(unsafe { &*((self.sdt.data_address() + self.i) as *const DmarAtsr) }) - } else { - DmarEntry::InvalidAtsr(entry_len) - }, - 3 => if entry_len == mem::size_of::() { - DmarEntry::Rhsa(unsafe { &*((self.sdt.data_address() + self.i) as *const DmarRhsa) }) - } else { - DmarEntry::InvalidRhsa(entry_len) - }, - 4 => if entry_len >= mem::size_of::() { - DmarEntry::Andd(unsafe { &*((self.sdt.data_address() + self.i) as *const DmarAndd) }) - } else { - DmarEntry::InvalidAndd(entry_len) - }, - _ => DmarEntry::Unknown(entry_type) - }; - - self.i += entry_len; - - Some(item) - } else { - None - } - } else { - None - } - } -} diff --git a/src/acpi/fadt.rs b/src/acpi/fadt.rs deleted file mode 100644 index c7be82ab..00000000 --- a/src/acpi/fadt.rs +++ /dev/null @@ -1,124 +0,0 @@ -use core::{mem, ptr}; - -use super::sdt::Sdt; -use super::{ACPI_TABLE, SDT_POINTERS, get_sdt, find_sdt, get_sdt_signature, load_table}; - -use paging::ActivePageTable; - -#[repr(packed)] -#[derive(Debug)] -pub struct Fadt { - pub header: Sdt, - pub firmware_ctrl: u32, - pub dsdt: u32, - - // field used in ACPI 1.0; no longer in use, for compatibility only - reserved: u8, - - pub preferred_power_managament: u8, - pub sci_interrupt: u16, - pub smi_command_port: u32, - pub acpi_enable: u8, - pub acpi_disable: u8, - pub s4_bios_req: u8, - pub pstate_control: u8, - pub pm1a_event_block: u32, - pub pm1b_event_block: u32, - pub pm1a_control_block: u32, - pub pm1b_control_block: u32, - pub pm2_control_block: u32, - pub pm_timer_block: u32, - pub gpe0_block: u32, - pub gpe1_block: u32, - pub pm1_event_length: u8, - pub pm1_control_length: u8, - pub pm2_control_length: u8, - pub pm_timer_length: u8, - pub gpe0_ength: u8, - pub gpe1_length: u8, - pub gpe1_base: u8, - pub c_state_control: u8, - pub worst_c2_latency: u16, - pub worst_c3_latency: u16, - pub flush_size: u16, - pub flush_stride: u16, - pub duty_offset: u8, - pub duty_width: u8, - pub day_alarm: u8, - pub month_alarm: u8, - pub century: u8, - - // reserved in ACPI 1.0; used since ACPI 2.0+ - pub boot_architecture_flags: u16, - - reserved2: u8, - pub flags: u32, -} - -/* ACPI 2 structure -#[repr(packed)] -#[derive(Clone, Copy, Debug, Default)] -pub struct GenericAddressStructure { - address_space: u8, - bit_width: u8, - bit_offset: u8, - access_size: u8, - address: u64, -} - -{ - // 12 byte structure; see below for details - pub reset_reg: GenericAddressStructure, - - pub reset_value: u8, - reserved3: [u8; 3], - - // 64bit pointers - Available on ACPI 2.0+ - pub x_firmware_control: u64, - pub x_dsdt: u64, - - pub x_pm1a_event_block: GenericAddressStructure, - pub x_pm1b_event_block: GenericAddressStructure, - pub x_pm1a_control_block: GenericAddressStructure, - pub x_pm1b_control_block: GenericAddressStructure, - pub x_pm2_control_block: GenericAddressStructure, - pub x_pm_timer_block: GenericAddressStructure, - pub x_gpe0_block: GenericAddressStructure, - pub x_gpe1_block: GenericAddressStructure, -} -*/ - -impl Fadt { - pub fn new(sdt: &'static Sdt) -> Option { - if &sdt.signature == b"FACP" && sdt.length as usize >= mem::size_of::() { - Some(unsafe { ptr::read((sdt as *const Sdt) as *const Fadt) }) - } else { - None - } - } - - pub fn init(active_table: &mut ActivePageTable) { - let fadt_sdt = find_sdt("FACP"); - let fadt = if fadt_sdt.len() == 1 { - load_table(get_sdt_signature(fadt_sdt[0])); - Fadt::new(fadt_sdt[0]) - } else { - println!("Unable to find FADT"); - return; - }; - - if let Some(fadt) = fadt { - println!(" FACP: {:X}", fadt.dsdt); - - let dsdt_sdt = get_sdt(fadt.dsdt as usize, active_table); - - let signature = get_sdt_signature(dsdt_sdt); - if let Some(ref mut ptrs) = *(SDT_POINTERS.write()) { - ptrs.insert(signature, dsdt_sdt); - } - - let mut fadt_t = ACPI_TABLE.fadt.write(); - *fadt_t = Some(fadt); - } - } -} diff --git a/src/acpi/gtdt.rs b/src/acpi/gtdt.rs new file mode 100644 index 00000000..218c942c --- /dev/null +++ b/src/acpi/gtdt.rs @@ -0,0 +1,68 @@ +use alloc::boxed::Box; +use core::mem; + +use super::{find_sdt, sdt::Sdt}; +use crate::{ + device::generic_timer::GenericTimer, + dtb::irqchip::{register_irq, IRQ_CHIP}, +}; + +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct Gtdt { + pub header: Sdt, + pub cnt_control_base: u64, + _reserved: u32, + pub secure_el1_timer_gsiv: u32, + pub secure_el1_timer_flags: u32, + pub non_secure_el1_timer_gsiv: u32, + pub non_secure_el1_timer_flags: u32, + pub virtual_el1_timer_gsiv: u32, + pub virtual_el1_timer_flags: u32, + pub el2_timer_gsiv: u32, + pub el2_timer_flags: u32, + pub cnt_read_base: u64, + pub platform_timer_count: u32, + pub platform_timer_offset: u32, + /*TODO: we don't need these yet, and they cause short tables to fail parsing + pub virtual_el2_timer_gsiv: u32, + pub virtual_el2_timer_flags: u32, + */ + //TODO: platform timer structure (at platform timer offset, with platform timer count) +} + +impl Gtdt { + pub fn init() { + let gtdt_sdt = find_sdt("GTDT"); + let gtdt = if gtdt_sdt.len() == 1 { + match Gtdt::new(gtdt_sdt[0]) { + Some(gtdt) => gtdt, + None => { + log::warn!("Failed to parse GTDT"); + return; + } + } + } else { + log::warn!("Unable to find GTDT"); + return; + }; + + let gsiv = gtdt.non_secure_el1_timer_gsiv; + log::info!("generic_timer gsiv = {}", gsiv); + let mut timer = GenericTimer { + clk_freq: 0, + reload_count: 0, + }; + timer.init(); + register_irq(gsiv, Box::new(timer)); + unsafe { IRQ_CHIP.irq_enable(gsiv as u32) }; + } + + pub fn new(sdt: &'static Sdt) -> Option<&'static Gtdt> { + if &sdt.signature == b"GTDT" && sdt.length as usize >= mem::size_of::() { + Some(unsafe { &*((sdt as *const Sdt) as *const Gtdt) }) + } else { + None + } + } +} diff --git a/src/acpi/hpet.rs b/src/acpi/hpet.rs index 73d15be3..c3c05c45 100644 --- a/src/acpi/hpet.rs +++ b/src/acpi/hpet.rs @@ -1,26 +1,13 @@ use core::{mem, ptr}; -use core::intrinsics::{volatile_load, volatile_store}; - -use memory::Frame; -use paging::{ActivePageTable, PhysicalAddress, Page, VirtualAddress}; -use paging::entry::EntryFlags; - -use super::sdt::Sdt; -use super::{ACPI_TABLE, find_sdt, load_table, get_sdt_signature}; - -#[repr(packed)] -#[derive(Clone, Copy, Debug, Default)] -pub struct GenericAddressStructure { - address_space: u8, - bit_width: u8, - bit_offset: u8, - access_size: u8, - pub address: u64, -} +use core::ptr::{read_volatile, write_volatile}; + +use crate::memory::{map_device_memory, PhysicalAddress, PAGE_SIZE}; -#[repr(packed)] -#[derive(Debug)] +use super::{find_sdt, sdt::Sdt, GenericAddressStructure, ACPI_TABLE}; + +#[repr(C, packed)] +#[derive(Clone, Copy, Debug)] pub struct Hpet { pub header: Sdt, @@ -32,15 +19,14 @@ pub struct Hpet { pub hpet_number: u8, pub min_periodic_clk_tick: u16, - pub oem_attribute: u8 + pub oem_attribute: u8, } impl Hpet { - pub fn init(active_table: &mut ActivePageTable) { + pub fn init() { let hpet_sdt = find_sdt("HPET"); let hpet = if hpet_sdt.len() == 1 { - load_table(get_sdt_signature(hpet_sdt[0])); - Hpet::new(hpet_sdt[0], active_table) + Hpet::new(hpet_sdt[0]) } else { println!("Unable to find HPET"); return; @@ -54,30 +40,82 @@ impl Hpet { } } - pub fn new(sdt: &'static Sdt, active_table: &mut ActivePageTable) -> Option { + pub fn new(sdt: &'static Sdt) -> Option { if &sdt.signature == b"HPET" && sdt.length as usize >= mem::size_of::() { let s = unsafe { ptr::read((sdt as *const Sdt) as *const Hpet) }; - unsafe { s.base_address.init(active_table) }; - Some(s) + if s.base_address.address_space == 0 { + unsafe { s.map() }; + Some(s) + } else { + log::warn!( + "HPET has unsupported address space {}", + s.base_address.address_space + ); + None + } } else { None } } } -impl GenericAddressStructure { - pub unsafe fn init(&self, active_table: &mut ActivePageTable) { - let page = Page::containing_address(VirtualAddress::new(self.address as usize)); - let frame = Frame::containing_address(PhysicalAddress::new(self.address as usize)); - let result = active_table.map_to(page, frame, EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE); - result.flush(active_table); +//TODO: x86 use assumes only one HPET and only one GenericAddressStructure +#[cfg(target_arch = "x86")] +impl Hpet { + pub unsafe fn map(&self) { + use crate::{ + memory::{Frame, KernelMapper}, + paging::{entry::EntryFlags, Page, VirtualAddress}, + }; + use rmm::PageFlags; + + let frame = Frame::containing(PhysicalAddress::new(self.base_address.address as usize)); + let page = Page::containing_address(VirtualAddress::new(crate::HPET_OFFSET)); + + KernelMapper::lock() + .get_mut() + .expect( + "KernelMapper locked re-entrant while mapping memory for GenericAddressStructure", + ) + .map_phys( + page.start_address(), + frame.base(), + PageFlags::new() + .write(true) + .custom_flag(EntryFlags::NO_CACHE.bits(), true), + ) + .expect("failed to map memory for GenericAddressStructure") + .flush(); + } + + pub unsafe fn read_u64(&self, offset: usize) -> u64 { + read_volatile((crate::HPET_OFFSET + offset) as *const u64) + } + + pub unsafe fn write_u64(&mut self, offset: usize, value: u64) { + write_volatile((crate::HPET_OFFSET + offset) as *mut u64, value); + } +} + +#[cfg(not(target_arch = "x86"))] +impl Hpet { + pub unsafe fn map(&self) { + map_device_memory( + PhysicalAddress::new(self.base_address.address as usize), + PAGE_SIZE, + ); } - pub unsafe fn read_u64(&self, offset: usize) -> u64{ - volatile_load((self.address as usize + offset) as *const u64) + pub unsafe fn read_u64(&self, offset: usize) -> u64 { + read_volatile( + (self.base_address.address as usize + offset + crate::PHYS_OFFSET) as *const u64, + ) } pub unsafe fn write_u64(&mut self, offset: usize, value: u64) { - volatile_store((self.address as usize + offset) as *mut u64, value); + write_volatile( + (self.base_address.address as usize + offset + crate::PHYS_OFFSET) as *mut u64, + value, + ); } } diff --git a/src/acpi/madt.rs b/src/acpi/madt.rs deleted file mode 100644 index e95c5193..00000000 --- a/src/acpi/madt.rs +++ /dev/null @@ -1,262 +0,0 @@ -use core::mem; - -use memory::{allocate_frames, Frame}; -use paging::{ActivePageTable, Page, PhysicalAddress, VirtualAddress}; -use paging::entry::EntryFlags; - -use super::sdt::Sdt; -use super::{AP_STARTUP, TRAMPOLINE, find_sdt, load_table, get_sdt_signature}; - -use core::intrinsics::{atomic_load, atomic_store}; -use core::sync::atomic::Ordering; - -use device::local_apic::LOCAL_APIC; -use interrupt; -use start::{kstart_ap, CPU_COUNT, AP_READY}; - -/// The Multiple APIC Descriptor Table -#[derive(Debug)] -pub struct Madt { - sdt: &'static Sdt, - pub local_address: u32, - pub flags: u32 -} - -impl Madt { - pub fn init(active_table: &mut ActivePageTable) { - let madt_sdt = find_sdt("APIC"); - let madt = if madt_sdt.len() == 1 { - load_table(get_sdt_signature(madt_sdt[0])); - Madt::new(madt_sdt[0]) - } else { - println!("Unable to find MADT"); - return; - }; - - if let Some(madt) = madt { - println!(" APIC: {:>08X}: {}", madt.local_address, madt.flags); - - let local_apic = unsafe { &mut LOCAL_APIC }; - let me = local_apic.id() as u8; - - if local_apic.x2 { - println!(" X2APIC {}", me); - } else { - println!(" XAPIC {}: {:>08X}", me, local_apic.address); - } - - if cfg!(feature = "multi_core") { - let trampoline_frame = Frame::containing_address(PhysicalAddress::new(TRAMPOLINE)); - let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE)); - - // Map trampoline - let result = active_table.map_to(trampoline_page, trampoline_frame, EntryFlags::PRESENT | EntryFlags::WRITABLE); - result.flush(active_table); - - for madt_entry in madt.iter() { - println!(" {:?}", madt_entry); - match madt_entry { - MadtEntry::LocalApic(ap_local_apic) => if ap_local_apic.id == me { - println!(" This is my local APIC"); - } else { - if ap_local_apic.flags & 1 == 1 { - // Increase CPU ID - CPU_COUNT.fetch_add(1, Ordering::SeqCst); - - // Allocate a stack - let stack_start = allocate_frames(64).expect("no more frames in acpi stack_start").start_address().get() + ::KERNEL_OFFSET; - let stack_end = stack_start + 64 * 4096; - - let ap_ready = TRAMPOLINE as *mut u64; - let ap_cpu_id = unsafe { ap_ready.offset(1) }; - let ap_page_table = unsafe { ap_ready.offset(2) }; - let ap_stack_start = unsafe { ap_ready.offset(3) }; - let ap_stack_end = unsafe { ap_ready.offset(4) }; - let ap_code = unsafe { ap_ready.offset(5) }; - - // Set the ap_ready to 0, volatile - unsafe { atomic_store(ap_ready, 0) }; - unsafe { atomic_store(ap_cpu_id, ap_local_apic.id as u64) }; - unsafe { atomic_store(ap_page_table, active_table.address() as u64) }; - unsafe { atomic_store(ap_stack_start, stack_start as u64) }; - unsafe { atomic_store(ap_stack_end, stack_end as u64) }; - unsafe { atomic_store(ap_code, kstart_ap as u64) }; - AP_READY.store(false, Ordering::SeqCst); - - print!(" AP {}:", ap_local_apic.id); - - // Send INIT IPI - { - let mut icr = 0x4500; - if local_apic.x2 { - icr |= (ap_local_apic.id as u64) << 32; - } else { - icr |= (ap_local_apic.id as u64) << 56; - } - print!(" IPI..."); - local_apic.set_icr(icr); - } - - // Send START IPI - { - //Start at 0x0800:0000 => 0x8000. Hopefully the bootloader code is still there - let ap_segment = (AP_STARTUP >> 12) & 0xFF; - let mut icr = 0x4600 | ap_segment as u64; - - if local_apic.x2 { - icr |= (ap_local_apic.id as u64) << 32; - } else { - icr |= (ap_local_apic.id as u64) << 56; - } - - print!(" SIPI..."); - local_apic.set_icr(icr); - } - - // Wait for trampoline ready - print!(" Wait..."); - while unsafe { atomic_load(ap_ready) } == 0 { - interrupt::pause(); - } - print!(" Trampoline..."); - while ! AP_READY.load(Ordering::SeqCst) { - interrupt::pause(); - } - println!(" Ready"); - - active_table.flush_all(); - } else { - println!(" CPU Disabled"); - } - }, - _ => () - } - } - - // Unmap trampoline - let (result, _frame) = active_table.unmap_return(trampoline_page, false); - result.flush(active_table); - } - } - } - - pub fn new(sdt: &'static Sdt) -> Option { - if &sdt.signature == b"APIC" && sdt.data_len() >= 8 { //Not valid if no local address and flags - let local_address = unsafe { *(sdt.data_address() as *const u32) }; - let flags = unsafe { *(sdt.data_address() as *const u32).offset(1) }; - - Some(Madt { - sdt: sdt, - local_address: local_address, - flags: flags - }) - } else { - None - } - } - - pub fn iter(&self) -> MadtIter { - MadtIter { - sdt: self.sdt, - i: 8 // Skip local controller address and flags - } - } -} - -/// - -/// MADT Local APIC -#[derive(Debug)] -#[repr(packed)] -pub struct MadtLocalApic { - /// Processor ID - pub processor: u8, - /// Local APIC ID - pub id: u8, - /// Flags. 1 means that the processor is enabled - pub flags: u32 -} - -/// MADT I/O APIC -#[derive(Debug)] -#[repr(packed)] -pub struct MadtIoApic { - /// I/O APIC ID - pub id: u8, - /// reserved - reserved: u8, - /// I/O APIC address - pub address: u32, - /// Global system interrupt base - pub gsi_base: u32 -} - -/// MADT Interrupt Source Override -#[derive(Debug)] -#[repr(packed)] -pub struct MadtIntSrcOverride { - /// Bus Source - pub bus_source: u8, - /// IRQ Source - pub irq_source: u8, - /// Global system interrupt base - pub gsi_base: u32, - /// Flags - pub flags: u16 -} - -/// MADT Entries -#[derive(Debug)] -pub enum MadtEntry { - LocalApic(&'static MadtLocalApic), - InvalidLocalApic(usize), - IoApic(&'static MadtIoApic), - InvalidIoApic(usize), - IntSrcOverride(&'static MadtIntSrcOverride), - InvalidIntSrcOverride(usize), - Unknown(u8) -} - -pub struct MadtIter { - sdt: &'static Sdt, - i: usize -} - -impl Iterator for MadtIter { - type Item = MadtEntry; - fn next(&mut self) -> Option { - if self.i + 1 < self.sdt.data_len() { - let entry_type = unsafe { *(self.sdt.data_address() as *const u8).offset(self.i as isize) }; - let entry_len = unsafe { *(self.sdt.data_address() as *const u8).offset(self.i as isize + 1) } as usize; - - if self.i + entry_len <= self.sdt.data_len() { - let item = match entry_type { - 0 => if entry_len == mem::size_of::() + 2 { - MadtEntry::LocalApic(unsafe { &*((self.sdt.data_address() + self.i + 2) as *const MadtLocalApic) }) - } else { - MadtEntry::InvalidLocalApic(entry_len) - }, - 1 => if entry_len == mem::size_of::() + 2 { - MadtEntry::IoApic(unsafe { &*((self.sdt.data_address() + self.i + 2) as *const MadtIoApic) }) - } else { - MadtEntry::InvalidIoApic(entry_len) - }, - 2 => if entry_len == mem::size_of::() + 2 { - MadtEntry::IntSrcOverride(unsafe { &*((self.sdt.data_address() + self.i + 2) as *const MadtIntSrcOverride) }) - } else { - MadtEntry::InvalidIntSrcOverride(entry_len) - }, - _ => MadtEntry::Unknown(entry_type) - }; - - self.i += entry_len; - - Some(item) - } else { - None - } - } else { - None - } - } -} diff --git a/src/acpi/madt/arch/aarch64.rs b/src/acpi/madt/arch/aarch64.rs new file mode 100644 index 00000000..dc460b08 --- /dev/null +++ b/src/acpi/madt/arch/aarch64.rs @@ -0,0 +1,97 @@ +use alloc::{boxed::Box, vec::Vec}; + +use super::{Madt, MadtEntry}; +use crate::{ + device::irqchip::{ + gic::{GenericInterruptController, GicCpuIf, GicDistIf}, + gicv3::{GicV3, GicV3CpuIf}, + }, + dtb::irqchip::{IrqChipItem, IRQ_CHIP}, + memory::{map_device_memory, PhysicalAddress, PAGE_SIZE}, +}; + +pub(super) fn init(madt: Madt) { + let mut gicd_opt = None; + let mut giccs = Vec::new(); + for madt_entry in madt.iter() { + println!(" {:#x?}", madt_entry); + match madt_entry { + MadtEntry::Gicc(gicc) => { + giccs.push(gicc); + } + MadtEntry::Gicd(gicd) => { + if gicd_opt.is_some() { + log::warn!("Only one GICD should be present on a system, ignoring this one"); + } else { + gicd_opt = Some(gicd); + } + } + _ => {} + } + } + let Some(gicd) = gicd_opt else { + log::warn!("No GICD found"); + return; + }; + let mut gic_dist_if = GicDistIf::default(); + unsafe { + let phys = PhysicalAddress::new(gicd.physical_base_address as usize); + let virt = map_device_memory(phys, PAGE_SIZE); + gic_dist_if.init(virt.data()); + }; + log::info!("{:#x?}", gic_dist_if); + match gicd.gic_version { + 1 | 2 => { + for gicc in giccs { + let mut gic_cpu_if = GicCpuIf::default(); + unsafe { + let phys = PhysicalAddress::new(gicc.physical_base_address as usize); + let virt = map_device_memory(phys, PAGE_SIZE); + gic_cpu_if.init(virt.data()) + }; + log::info!("{:#x?}", gic_cpu_if); + let gic = GenericInterruptController { + gic_dist_if, + gic_cpu_if, + irq_range: (0, 0), + }; + let chip = IrqChipItem { + phandle: 0, + parents: Vec::new(), + children: Vec::new(), + ic: Box::new(gic), + }; + unsafe { IRQ_CHIP.irq_chip_list.chips.push(chip) }; + //TODO: support more GICCs + break; + } + } + 3 => { + for gicc in giccs { + let mut gic_cpu_if = GicV3CpuIf; + unsafe { gic_cpu_if.init() }; + log::info!("{:#x?}", gic_cpu_if); + let gic = GicV3 { + gic_dist_if, + gic_cpu_if, + //TODO: get GICRs + gicrs: Vec::new(), + irq_range: (0, 0), + }; + let chip = IrqChipItem { + phandle: 0, + parents: Vec::new(), + children: Vec::new(), + ic: Box::new(gic), + }; + unsafe { IRQ_CHIP.irq_chip_list.chips.push(chip) }; + //TODO: support more GICCs + break; + } + } + _ => { + log::warn!("unsupported GIC version {}", gicd.gic_version); + } + } + unsafe { IRQ_CHIP.init(None) }; +} diff --git a/src/acpi/madt/arch/other.rs b/src/acpi/madt/arch/other.rs new file mode 100644 index 00000000..cf9440be --- /dev/null +++ b/src/acpi/madt/arch/other.rs @@ -0,0 +1,9 @@ +use super::Madt; + +pub(super) fn init(madt: Madt) { + for madt_entry in madt.iter() { + println!(" {:#x?}", madt_entry); + } + + log::warn!("MADT not yet handled on this platform"); +} diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs new file mode 100644 index 00000000..a8121c68 --- /dev/null +++ b/src/acpi/madt/arch/x86.rs @@ -0,0 +1,160 @@ +use core::sync::atomic::{AtomicU8, Ordering}; + +use crate::{ + device::local_apic::the_local_apic, + interrupt, + memory::{allocate_p2frame, Frame, KernelMapper}, + paging::{Page, PageFlags, PhysicalAddress, RmmA, RmmArch, VirtualAddress, PAGE_SIZE}, + start::{kstart_ap, AP_READY, CPU_COUNT}, +}; + +use super::{Madt, MadtEntry}; + +const TRAMPOLINE: usize = 0x8000; +static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); + +pub(super) fn init(madt: Madt) { + let local_apic = unsafe { the_local_apic() }; + let me = local_apic.id() as u8; + + if local_apic.x2 { + println!(" X2APIC {}", me); + } else { + println!(" XAPIC {}: {:>08X}", me, local_apic.address); + } + + if cfg!(feature = "multi_core") { + // Map trampoline + let trampoline_frame = Frame::containing(PhysicalAddress::new(TRAMPOLINE)); + let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE)); + let (result, page_table_physaddr) = unsafe { + //TODO: do not have writable and executable! + let mut mapper = KernelMapper::lock(); + + let result = mapper + .get_mut() + .expect("expected kernel page table not to be recursively locked while initializing MADT") + .map_phys(trampoline_page.start_address(), trampoline_frame.base(), PageFlags::new().execute(true).write(true)) + .expect("failed to map trampoline"); + + (result, mapper.table().phys().data()) + }; + result.flush(); + + // Write trampoline, make sure TRAMPOLINE page is free for use + for i in 0..TRAMPOLINE_DATA.len() { + unsafe { + (*((TRAMPOLINE as *mut u8).add(i) as *const AtomicU8)) + .store(TRAMPOLINE_DATA[i], Ordering::SeqCst); + } + } + + for madt_entry in madt.iter() { + println!(" {:x?}", madt_entry); + match madt_entry { + MadtEntry::LocalApic(ap_local_apic) => { + if ap_local_apic.id == me { + println!(" This is my local APIC"); + } else { + if ap_local_apic.flags & 1 == 1 { + // Increase CPU ID + CPU_COUNT.fetch_add(1, Ordering::SeqCst); + + // Allocate a stack + let stack_start = allocate_p2frame(4) + .expect("no more frames in acpi stack_start") + .base() + .data() + + crate::PHYS_OFFSET; + let stack_end = stack_start + (PAGE_SIZE << 4); + + let ap_ready = (TRAMPOLINE + 8) as *mut u64; + let ap_cpu_id = unsafe { ap_ready.add(1) }; + let ap_page_table = unsafe { ap_ready.add(2) }; + let ap_stack_start = unsafe { ap_ready.add(3) }; + let ap_stack_end = unsafe { ap_ready.add(4) }; + let ap_code = unsafe { ap_ready.add(5) }; + + // Set the ap_ready to 0, volatile + unsafe { + ap_ready.write(0); + ap_cpu_id.write(ap_local_apic.processor.into()); + ap_page_table.write(page_table_physaddr as u64); + ap_stack_start.write(stack_start as u64); + ap_stack_end.write(stack_end as u64); + ap_code.write(kstart_ap as u64); + + // TODO: Is this necessary (this fence)? + core::arch::asm!(""); + }; + AP_READY.store(false, Ordering::SeqCst); + + print!( + " AP {} APIC {}:", + ap_local_apic.processor, ap_local_apic.id + ); + + // Send INIT IPI + { + let mut icr = 0x4500; + if local_apic.x2 { + icr |= (ap_local_apic.id as u64) << 32; + } else { + icr |= (ap_local_apic.id as u64) << 56; + } + print!(" IPI..."); + local_apic.set_icr(icr); + } + + // Send START IPI + { + //Start at 0x0800:0000 => 0x8000. Hopefully the bootloader code is still there + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; + let mut icr = 0x4600 | ap_segment as u64; + + if local_apic.x2 { + icr |= (ap_local_apic.id as u64) << 32; + } else { + icr |= (ap_local_apic.id as u64) << 56; + } + + print!(" SIPI..."); + local_apic.set_icr(icr); + } + + // Wait for trampoline ready + print!(" Wait..."); + while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } + == 0 + { + interrupt::pause(); + } + print!(" Trampoline..."); + while !AP_READY.load(Ordering::SeqCst) { + interrupt::pause(); + } + println!(" Ready"); + + unsafe { + RmmA::invalidate_all(); + } + } else { + println!(" CPU Disabled"); + } + } + } + _ => (), + } + } + + // Unmap trampoline + let (_frame, _, flush) = unsafe { + KernelMapper::lock() + .get_mut() + .expect("expected kernel page table not to be recursively locked while initializing MADT") + .unmap_phys(trampoline_page.start_address(), true) + .expect("failed to unmap trampoline page") + }; + flush.flush(); + } +} diff --git a/src/acpi/madt/mod.rs b/src/acpi/madt/mod.rs new file mode 100644 index 00000000..53e57f79 --- /dev/null +++ b/src/acpi/madt/mod.rs @@ -0,0 +1,244 @@ +use core::{cell::SyncUnsafeCell, mem}; + +use super::{find_sdt, sdt::Sdt}; + +/// The Multiple APIC Descriptor Table +#[derive(Clone, Copy, Debug)] +pub struct Madt { + sdt: &'static Sdt, + pub local_address: u32, + pub flags: u32, +} + +#[cfg(target_arch = "aarch64")] +#[path = "arch/aarch64.rs"] +mod arch; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[path = "arch/x86.rs"] +mod arch; + +#[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))] +#[path = "arch/other.rs"] +mod arch; + +static MADT: SyncUnsafeCell> = SyncUnsafeCell::new(None); +pub fn madt() -> Option<&'static Madt> { + unsafe { &*MADT.get() }.as_ref() +} +pub const FLAG_PCAT: u32 = 1; + +impl Madt { + pub fn init() { + let madt_sdt = find_sdt("APIC"); + let madt = if madt_sdt.len() == 1 { + Madt::new(madt_sdt[0]) + } else { + println!("Unable to find MADT"); + return; + }; + + if let Some(madt) = madt { + // safe because no APs have been started yet. + unsafe { MADT.get().write(Some(madt)) }; + + println!(" APIC: {:>08X}: {}", madt.local_address, madt.flags); + + arch::init(madt); + } + } + + pub fn new(sdt: &'static Sdt) -> Option { + if &sdt.signature == b"APIC" && sdt.data_len() >= 8 { + //Not valid if no local address and flags + let local_address = unsafe { (sdt.data_address() as *const u32).read_unaligned() }; + let flags = unsafe { + (sdt.data_address() as *const u32) + .offset(1) + .read_unaligned() + }; + + Some(Madt { + sdt, + local_address, + flags, + }) + } else { + None + } + } + + pub fn iter(&self) -> MadtIter { + MadtIter { + sdt: self.sdt, + i: 8, // Skip local controller address and flags + } + } +} + +/// MADT Local APIC +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtLocalApic { + /// Processor ID + pub processor: u8, + /// Local APIC ID + pub id: u8, + /// Flags. 1 means that the processor is enabled + pub flags: u32, +} + +/// MADT I/O APIC +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtIoApic { + /// I/O APIC ID + pub id: u8, + /// reserved + _reserved: u8, + /// I/O APIC address + pub address: u32, + /// Global system interrupt base + pub gsi_base: u32, +} + +/// MADT Interrupt Source Override +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtIntSrcOverride { + /// Bus Source + pub bus_source: u8, + /// IRQ Source + pub irq_source: u8, + /// Global system interrupt base + pub gsi_base: u32, + /// Flags + pub flags: u16, +} + +/// MADT GICC +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtGicc { + _reserved: u16, + pub cpu_interface_number: u32, + pub acpi_processor_uid: u32, + pub flags: u32, + pub parking_protocol_version: u32, + pub performance_interrupt_gsiv: u32, + pub parked_address: u64, + pub physical_base_address: u64, + pub gicv: u64, + pub gich: u64, + pub vgic_maintenance_interrupt: u32, + pub gicr_base_address: u64, + pub mpidr: u64, + pub processor_power_efficiency_class: u8, + _reserved2: u8, + pub spe_overflow_interrupt: u16, + //TODO: optional field introduced in ACPI 6.5: pub trbe_interrupt: u16, +} + +/// MADT GICD +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtGicd { + _reserved: u16, + pub gic_id: u32, + pub physical_base_address: u64, + pub system_vector_base: u32, + pub gic_version: u8, + _reserved2: [u8; 3], +} + +/// MADT Entries +#[derive(Debug)] +pub enum MadtEntry { + LocalApic(&'static MadtLocalApic), + InvalidLocalApic(usize), + IoApic(&'static MadtIoApic), + InvalidIoApic(usize), + IntSrcOverride(&'static MadtIntSrcOverride), + InvalidIntSrcOverride(usize), + Gicc(&'static MadtGicc), + InvalidGicc(usize), + Gicd(&'static MadtGicd), + InvalidGicd(usize), + Unknown(u8), +} + +pub struct MadtIter { + sdt: &'static Sdt, + i: usize, +} + +impl Iterator for MadtIter { + type Item = MadtEntry; + fn next(&mut self) -> Option { + if self.i + 1 < self.sdt.data_len() { + let entry_type = unsafe { *(self.sdt.data_address() as *const u8).add(self.i) }; + let entry_len = + unsafe { *(self.sdt.data_address() as *const u8).add(self.i + 1) } as usize; + + if self.i + entry_len <= self.sdt.data_len() { + let item = match entry_type { + 0x0 => { + if entry_len == mem::size_of::() + 2 { + MadtEntry::LocalApic(unsafe { + &*((self.sdt.data_address() + self.i + 2) as *const MadtLocalApic) + }) + } else { + MadtEntry::InvalidLocalApic(entry_len) + } + } + 0x1 => { + if entry_len == mem::size_of::() + 2 { + MadtEntry::IoApic(unsafe { + &*((self.sdt.data_address() + self.i + 2) as *const MadtIoApic) + }) + } else { + MadtEntry::InvalidIoApic(entry_len) + } + } + 0x2 => { + if entry_len == mem::size_of::() + 2 { + MadtEntry::IntSrcOverride(unsafe { + &*((self.sdt.data_address() + self.i + 2) + as *const MadtIntSrcOverride) + }) + } else { + MadtEntry::InvalidIntSrcOverride(entry_len) + } + } + 0xB => { + if entry_len >= mem::size_of::() + 2 { + MadtEntry::Gicc(unsafe { + &*((self.sdt.data_address() + self.i + 2) as *const MadtGicc) + }) + } else { + MadtEntry::InvalidGicc(entry_len) + } + } + 0xC => { + if entry_len >= mem::size_of::() + 2 { + MadtEntry::Gicd(unsafe { + &*((self.sdt.data_address() + self.i + 2) as *const MadtGicd) + }) + } else { + MadtEntry::InvalidGicd(entry_len) + } + } + _ => MadtEntry::Unknown(entry_type), + }; + + self.i += entry_len; + + Some(item) + } else { + None + } + } else { + None + } + } +} diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs index 8939938d..465f7e79 100644 --- a/src/acpi/mod.rs +++ b/src/acpi/mod.rs @@ -1,152 +1,155 @@ //! # ACPI //! Code to parse the ACPI tables -use alloc::btree_map::BTreeMap; -use alloc::string::String; -use alloc::vec::Vec; -use alloc::boxed::Box; +use alloc::{boxed::Box, string::String, vec::Vec}; -use syscall::io::{Io, Pio}; +use hashbrown::HashMap; +use spin::{Once, RwLock}; -use spin::RwLock; +use log::info; -use stop::kstop; - -use memory::Frame; -use paging::{ActivePageTable, Page, PhysicalAddress, VirtualAddress}; -use paging::entry::EntryFlags; - -use self::dmar::Dmar; -use self::fadt::Fadt; -use self::madt::Madt; -use self::rsdt::Rsdt; -use self::sdt::Sdt; -use self::xsdt::Xsdt; -use self::hpet::Hpet; -use self::rxsdt::Rxsdt; -use self::rsdp::RSDP; +use crate::{ + memory::KernelMapper, + paging::{PageFlags, PhysicalAddress, RmmA, RmmArch}, +}; -use self::aml::{parse_aml_table, AmlError, AmlValue}; +use self::{hpet::Hpet, madt::Madt, rsdp::RSDP, rsdt::Rsdt, rxsdt::Rxsdt, sdt::Sdt, xsdt::Xsdt}; +#[cfg(target_arch = "aarch64")] +mod gtdt; pub mod hpet; -mod dmar; -mod fadt; -mod madt; +pub mod madt; +mod rsdp; mod rsdt; -mod sdt; -mod xsdt; -mod aml; mod rxsdt; -mod rsdp; - -const TRAMPOLINE: usize = 0x7E00; -const AP_STARTUP: usize = TRAMPOLINE + 512; +pub mod sdt; +#[cfg(target_arch = "aarch64")] +mod spcr; +mod xsdt; -fn get_sdt(sdt_address: usize, active_table: &mut ActivePageTable) -> &'static Sdt { - { - let page = Page::containing_address(VirtualAddress::new(sdt_address)); - if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().get())); - let result = active_table.map_to(page, frame, EntryFlags::PRESENT | EntryFlags::NO_EXECUTE); - result.flush(active_table); - } +unsafe fn map_linearly(addr: PhysicalAddress, len: usize, mapper: &mut crate::paging::PageMapper) { + let base = PhysicalAddress::new(crate::paging::round_down_pages(addr.data())); + let aligned_len = crate::paging::round_up_pages(len + (addr.data() - base.data())); + + for page_idx in 0..aligned_len / crate::memory::PAGE_SIZE { + let (_, flush) = mapper + .map_linearly( + base.add(page_idx * crate::memory::PAGE_SIZE), + PageFlags::new(), + ) + .expect("failed to linearly map SDT"); + flush.flush(); } +} - let sdt = unsafe { &*(sdt_address as *const Sdt) }; +pub fn get_sdt(sdt_address: usize, mapper: &mut KernelMapper) -> &'static Sdt { + let mapper = mapper + .get_mut() + .expect("KernelMapper mapper locked re-entrant in get_sdt"); - // Map extra SDT frames if required - { - let start_page = Page::containing_address(VirtualAddress::new(sdt_address + 4096)); - let end_page = Page::containing_address(VirtualAddress::new(sdt_address + sdt.length as usize)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().get())); - let result = active_table.map_to(page, frame, EntryFlags::PRESENT | EntryFlags::NO_EXECUTE); - result.flush(active_table); - } - } - } + let physaddr = PhysicalAddress::new(sdt_address); - sdt -} + let sdt; -fn init_aml_table(sdt: &'static Sdt) { - match parse_aml_table(sdt) { - Ok(_) => println!(": Parsed"), - Err(AmlError::AmlParseError(e)) => println!(": {}", e), - Err(AmlError::AmlInvalidOpCode) => println!(": Invalid opcode"), - Err(AmlError::AmlValueError) => println!(": Type constraints or value bounds not met"), - Err(AmlError::AmlDeferredLoad) => println!(": Deferred load reached top level"), - Err(AmlError::AmlFatalError(_, _, _)) => { - println!(": Fatal error occurred"); - unsafe { kstop(); } - }, - Err(AmlError::AmlHardFatal) => { - println!(": Fatal error occurred"); - unsafe { kstop(); } - } - } -} + unsafe { + const SDT_SIZE: usize = core::mem::size_of::(); + map_linearly(physaddr, SDT_SIZE, mapper); -fn init_namespace() { - { - let mut namespace = ACPI_TABLE.namespace.write(); - *namespace = Some(BTreeMap::new()); - } + sdt = &*(RmmA::phys_to_virt(physaddr).data() as *const Sdt); - let dsdt = find_sdt("DSDT"); - if dsdt.len() == 1 { - print!(" DSDT"); - load_table(get_sdt_signature(dsdt[0])); - init_aml_table(dsdt[0]); - } else { - println!("Unable to find DSDT"); - return; - }; + map_linearly( + physaddr.add(SDT_SIZE), + sdt.length as usize - SDT_SIZE, + mapper, + ); + } + sdt +} - let ssdts = find_sdt("SSDT"); +#[repr(C, packed)] +#[derive(Clone, Copy, Debug, Default)] +pub struct GenericAddressStructure { + pub address_space: u8, + pub bit_width: u8, + pub bit_offset: u8, + pub access_size: u8, + pub address: u64, +} - for ssdt in ssdts { - print!(" SSDT"); - load_table(get_sdt_signature(ssdt)); - init_aml_table(ssdt); +pub enum RxsdtEnum { + Rsdt(Rsdt), + Xsdt(Xsdt), +} +impl Rxsdt for RxsdtEnum { + fn iter(&self) -> Box> { + match self { + Self::Rsdt(rsdt) => ::iter(rsdt), + Self::Xsdt(xsdt) => ::iter(xsdt), + } } } +pub static RXSDT_ENUM: Once = Once::new(); + /// Parse the ACPI tables to gather CPU, interrupt, and timer information -pub unsafe fn init(active_table: &mut ActivePageTable) { +pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { { let mut sdt_ptrs = SDT_POINTERS.write(); - *sdt_ptrs = Some(BTreeMap::new()); - } - - { - let mut order = SDT_ORDER.write(); - *order = Some(vec!()); + *sdt_ptrs = Some(HashMap::new()); } // Search for RSDP - if let Some(rsdp) = RSDP::get_rsdp(active_table) { - let rxsdt = get_sdt(rsdp.sdt_address(), active_table); + let rsdp_opt = RSDP::get_rsdp(&mut KernelMapper::lock(), already_supplied_rsdp); + + if let Some(rsdp) = rsdp_opt { + info!("RSDP: {:?}", rsdp); + let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock()); for &c in rxsdt.signature.iter() { print!("{}", c as char); } println!(":"); - let rxsdt: Box = if let Some(rsdt) = Rsdt::new(rxsdt) { - Box::new(rsdt) + let rxsdt = if let Some(rsdt) = Rsdt::new(rxsdt) { + let mut initialized = false; + + let rsdt = RXSDT_ENUM.call_once(|| { + initialized = true; + + RxsdtEnum::Rsdt(rsdt) + }); + + if !initialized { + log::error!("RXSDT_ENUM already initialized"); + } + + rsdt } else if let Some(xsdt) = Xsdt::new(rxsdt) { - Box::new(xsdt) + let mut initialized = false; + + let xsdt = RXSDT_ENUM.call_once(|| { + initialized = true; + + RxsdtEnum::Xsdt(xsdt) + }); + if !initialized { + log::error!("RXSDT_ENUM already initialized"); + } + + xsdt } else { println!("UNKNOWN RSDT OR XSDT SIGNATURE"); return; }; - rxsdt.map_all(active_table); + // TODO: Don't touch ACPI tables in kernel? + + for sdt in rxsdt.iter() { + get_sdt(sdt, &mut KernelMapper::lock()); + } for sdt_address in rxsdt.iter() { - let sdt = &*(sdt_address as *const Sdt); + let sdt = &*((sdt_address + crate::PHYS_OFFSET) as *const Sdt); let signature = get_sdt_signature(sdt); if let Some(ref mut ptrs) = *(SDT_POINTERS.write()) { @@ -154,50 +157,27 @@ pub unsafe fn init(active_table: &mut ActivePageTable) { } } - Fadt::init(active_table); - Madt::init(active_table); - Dmar::init(active_table); - Hpet::init(active_table); - init_namespace(); + //TODO: support this on any arch + #[cfg(target_arch = "aarch64")] + spcr::Spcr::init(); + // TODO: Enumerate processors in userspace, and then provide an ACPI-independent interface + // to initialize enumerated processors to userspace? + Madt::init(); + // TODO: Let userspace setup HPET, and then provide an interface to specify which timer to + // use? + Hpet::init(); + #[cfg(target_arch = "aarch64")] + gtdt::Gtdt::init(); } else { println!("NO RSDP FOUND"); } } -pub fn set_global_s_state(state: u8) { - if state == 5 { - let fadt = ACPI_TABLE.fadt.read(); - - if let Some(ref fadt) = *fadt { - let port = fadt.pm1a_control_block as u16; - let mut val = 1 << 13; - - let namespace = ACPI_TABLE.namespace.read(); - - if let Some(ref namespace) = *namespace { - if let Some(s) = namespace.get("\\_S5") { - if let Ok(p) = s.get_as_package() { - let slp_typa = p[0].get_as_integer().expect("SLP_TYPa is not an integer"); - let slp_typb = p[1].get_as_integer().expect("SLP_TYPb is not an integer"); - - println!("Shutdown SLP_TYPa {:X}, SLP_TYPb {:X}", slp_typa, slp_typb); - val |= slp_typa as u16; - - println!("Shutdown with ACPI outw(0x{:X}, 0x{:X})", port, val); - Pio::::new(port).write(val); - } - } - } - } - } -} - -type SdtSignature = (String, [u8; 6], [u8; 8]); -pub static SDT_POINTERS: RwLock>> = RwLock::new(None); -pub static SDT_ORDER: RwLock>> = RwLock::new(None); +pub type SdtSignature = (String, [u8; 6], [u8; 8]); +pub static SDT_POINTERS: RwLock>> = RwLock::new(None); pub fn find_sdt(name: &str) -> Vec<&'static Sdt> { - let mut sdts: Vec<&'static Sdt> = vec!(); + let mut sdts: Vec<&'static Sdt> = vec![]; if let Some(ref ptrs) = *(SDT_POINTERS.read()) { for (signature, sdt) in ptrs { @@ -211,55 +191,17 @@ pub fn find_sdt(name: &str) -> Vec<&'static Sdt> { } pub fn get_sdt_signature(sdt: &'static Sdt) -> SdtSignature { - let signature = String::from_utf8(sdt.signature.to_vec()).expect("Error converting signature to string"); + let signature = + String::from_utf8(sdt.signature.to_vec()).expect("Error converting signature to string"); (signature, sdt.oem_id, sdt.oem_table_id) } -pub fn load_table(signature: SdtSignature) { - let mut order = SDT_ORDER.write(); - - if let Some(ref mut o) = *order { - o.push(signature); - } -} - -pub fn get_signature_from_index(index: usize) -> Option { - if let Some(ref order) = *(SDT_ORDER.read()) { - if index < order.len() { - Some(order[index].clone()) - } else { - None - } - } else { - None - } -} - -pub fn get_index_from_signature(signature: SdtSignature) -> Option { - if let Some(ref order) = *(SDT_ORDER.read()) { - let mut i = order.len(); - while i > 0 { - i -= 1; - - if order[i] == signature { - return Some(i); - } - } - } - - None -} - pub struct Acpi { - pub fadt: RwLock>, - pub namespace: RwLock>>, pub hpet: RwLock>, pub next_ctx: RwLock, } pub static ACPI_TABLE: Acpi = Acpi { - fadt: RwLock::new(None), - namespace: RwLock::new(None), hpet: RwLock::new(None), next_ctx: RwLock::new(0), }; diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs index 1ec35500..1706e1b0 100644 --- a/src/acpi/rsdp.rs +++ b/src/acpi/rsdp.rs @@ -1,36 +1,57 @@ -use memory::Frame; -use paging::{ActivePageTable, Page, PhysicalAddress, VirtualAddress}; -use paging::entry::EntryFlags; +use crate::{ + memory::{Frame, KernelMapper}, + paging::{Page, PageFlags, PhysicalAddress, VirtualAddress}, +}; /// RSDP #[derive(Copy, Clone, Debug)] -#[repr(packed)] +#[repr(C, packed)] pub struct RSDP { signature: [u8; 8], - checksum: u8, - oemid: [u8; 6], + _checksum: u8, + _oemid: [u8; 6], revision: u8, rsdt_address: u32, - length: u32, + _length: u32, xsdt_address: u64, - extended_checksum: u8, - reserved: [u8; 3] + _extended_checksum: u8, + _reserved: [u8; 3], } impl RSDP { + fn get_already_supplied_rsdp(rsdp_ptr: *const u8) -> RSDP { + // TODO: Validate + unsafe { *(rsdp_ptr as *const RSDP) } + } + pub fn get_rsdp( + mapper: &mut KernelMapper, + already_supplied_rsdp: Option<*const u8>, + ) -> Option { + if let Some(rsdp_ptr) = already_supplied_rsdp { + Some(Self::get_already_supplied_rsdp(rsdp_ptr)) + } else { + Self::get_rsdp_by_searching(mapper) + } + } /// Search for the RSDP - pub fn get_rsdp(active_table: &mut ActivePageTable) -> Option { + pub fn get_rsdp_by_searching(mapper: &mut KernelMapper) -> Option { let start_addr = 0xE_0000; let end_addr = 0xF_FFFF; // Map all of the ACPI RSDP space { - let start_frame = Frame::containing_address(PhysicalAddress::new(start_addr)); - let end_frame = Frame::containing_address(PhysicalAddress::new(end_addr)); + let start_frame = Frame::containing(PhysicalAddress::new(start_addr)); + let end_frame = Frame::containing(PhysicalAddress::new(end_addr)); for frame in Frame::range_inclusive(start_frame, end_frame) { - let page = Page::containing_address(VirtualAddress::new(frame.start_address().get())); - let result = active_table.map_to(page, frame, EntryFlags::PRESENT | EntryFlags::NO_EXECUTE); - result.flush(active_table); + let page = Page::containing_address(VirtualAddress::new(frame.base().data())); + let result = unsafe { + mapper + .get_mut() + .expect("KernelMapper locked re-entrant while locating RSDPs") + .map_phys(page.start_address(), frame.base(), PageFlags::new()) + .expect("failed to map page while searching for RSDP") + }; + result.flush(); } } @@ -38,7 +59,7 @@ impl RSDP { } fn search(start_addr: usize, end_addr: usize) -> Option { - for i in 0 .. (end_addr + 1 - start_addr)/16 { + for i in 0..(end_addr + 1 - start_addr) / 16 { let rsdp = unsafe { &*((start_addr + i * 16) as *const RSDP) }; if &rsdp.signature == b"RSD PTR " { return Some(*rsdp); diff --git a/src/acpi/rsdt.rs b/src/acpi/rsdt.rs index 7877a061..ce996d89 100644 --- a/src/acpi/rsdt.rs +++ b/src/acpi/rsdt.rs @@ -1,8 +1,7 @@ -use core::mem; use alloc::boxed::Box; +use core::{convert::TryFrom, mem}; -use super::sdt::Sdt; -use super::rxsdt::Rxsdt; +use super::{rxsdt::Rxsdt, sdt::Sdt}; #[derive(Debug)] pub struct Rsdt(&'static Sdt); @@ -15,27 +14,34 @@ impl Rsdt { None } } + pub fn as_slice(&self) -> &[u8] { + let length = + usize::try_from(self.0.length).expect("expected 32-bit length to fit within usize"); + + unsafe { core::slice::from_raw_parts(self.0 as *const _ as *const u8, length) } + } } impl Rxsdt for Rsdt { - fn iter(&self) -> Box> { - Box::new(RsdtIter { - sdt: self.0, - i: 0 - }) + fn iter(&self) -> Box> { + Box::new(RsdtIter { sdt: self.0, i: 0 }) } } pub struct RsdtIter { sdt: &'static Sdt, - i: usize + i: usize, } impl Iterator for RsdtIter { type Item = usize; fn next(&mut self) -> Option { - if self.i < self.sdt.data_len()/mem::size_of::() { - let item = unsafe { *(self.sdt.data_address() as *const u32).offset(self.i as isize) }; + if self.i < self.sdt.data_len() / mem::size_of::() { + let item = unsafe { + (self.sdt.data_address() as *const u32) + .add(self.i) + .read_unaligned() + }; self.i += 1; Some(item as usize) } else { diff --git a/src/acpi/rxsdt.rs b/src/acpi/rxsdt.rs index d3cf3542..f1803884 100644 --- a/src/acpi/rxsdt.rs +++ b/src/acpi/rxsdt.rs @@ -1,28 +1,5 @@ use alloc::boxed::Box; -use paging::ActivePageTable; - -use super::sdt::Sdt; -use super::get_sdt; - pub trait Rxsdt { - fn iter(&self) -> Box>; - - fn map_all(&self, active_table: &mut ActivePageTable) { - for sdt in self.iter() { - get_sdt(sdt, active_table); - } - } - - fn find(&self, signature: [u8; 4], oem_id: [u8; 6], oem_table_id: [u8; 8]) -> Option<&'static Sdt> { - for sdt in self.iter() { - let sdt = unsafe { &*(sdt as *const Sdt) }; - - if sdt.match_pattern(signature, oem_id, oem_table_id) { - return Some(sdt); - } - } - - None - } + fn iter(&self) -> Box>; } diff --git a/src/acpi/sdt.rs b/src/acpi/sdt.rs index ea856272..6c7bd6cd 100644 --- a/src/acpi/sdt.rs +++ b/src/acpi/sdt.rs @@ -1,18 +1,17 @@ use core::mem; -use core::slice; #[derive(Copy, Clone, Debug)] -#[repr(packed)] +#[repr(C, packed)] pub struct Sdt { - pub signature: [u8; 4], - pub length: u32, - pub revision: u8, - pub checksum: u8, - pub oem_id: [u8; 6], - pub oem_table_id: [u8; 8], - pub oem_revision: u32, - pub creator_id: u32, - pub creator_revision: u32 + pub signature: [u8; 4], + pub length: u32, + pub revision: u8, + pub checksum: u8, + pub oem_id: [u8; 6], + pub oem_table_id: [u8; 8], + pub oem_revision: u32, + pub creator_id: u32, + pub creator_revision: u32, } impl Sdt { @@ -31,12 +30,4 @@ impl Sdt { 0 } } - - pub fn data(&self) -> &[u8] { - unsafe { slice::from_raw_parts(self.data_address() as *const u8, self.data_len()) } - } - - pub fn match_pattern(&self, signature: [u8; 4], oem_id: [u8; 6], oem_table_id: [u8; 8]) -> bool{ - self.signature == signature && self.oem_id == oem_id && self.oem_table_id == oem_table_id - } } diff --git a/src/acpi/spcr.rs b/src/acpi/spcr.rs new file mode 100644 index 00000000..51cf1793 --- /dev/null +++ b/src/acpi/spcr.rs @@ -0,0 +1,117 @@ +use core::mem; + +use super::{find_sdt, sdt::Sdt, GenericAddressStructure}; +use crate::{ + device::{ + serial::{SerialKind, COM1}, + uart_pl011, + }, + memory::{map_device_memory, PhysicalAddress, PAGE_SIZE}, +}; + +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct Spcr { + pub header: Sdt, + pub interface_type: u8, + _reserved: [u8; 3], + pub base_address: GenericAddressStructure, + pub interrupt_type: u8, + pub irq: u8, + pub gsiv: u32, + pub configured_baud_rate: u8, + pub parity: u8, + pub stop_bits: u8, + pub flow_control: u8, + pub terminal_type: u8, + pub language: u8, + pub pci_device_id: u16, + pub pci_vendor_id: u16, + pub pci_bus: u8, + pub pci_device: u8, + pub pci_function: u8, + pub pci_flags: u32, + pub pci_segment: u8, + /*TODO: these fields are optional based on the table revision + pub uart_clock_frequency: u32, + pub precise_baud_rate: u32, + pub namespace_string_length: u16, + pub namespace_string_offset: u16, + */ + // namespace_string +} + +impl Spcr { + pub fn init() { + let spcr_sdt = find_sdt("SPCR"); + let spcr = if spcr_sdt.len() == 1 { + match Spcr::new(spcr_sdt[0]) { + Some(spcr) => spcr, + None => { + log::warn!("Failed to parse SPCR"); + return; + } + } + } else { + log::warn!("Unable to find SPCR"); + return; + }; + + if spcr.base_address.address == 0 { + // Serial disabled + return; + } + + if spcr.header.revision >= 2 { + match spcr.interface_type { + 3 => { + // PL011 + if spcr.base_address.address_space == 0 + && spcr.base_address.bit_width == 32 + && spcr.base_address.bit_offset == 0 + && spcr.base_address.access_size == 3 + { + let virt = unsafe { + map_device_memory( + PhysicalAddress::new(spcr.base_address.address as usize), + PAGE_SIZE, + ) + }; + let serial_port = uart_pl011::SerialPort::new(virt.data(), false); + *COM1.lock() = Some(SerialKind::Pl011(serial_port)) + } else { + log::warn!( + "SPCR unsuppoted address for PL011 {:#x?}", + spcr.base_address + ); + } + } + //TODO: support more types! + unsupported => { + log::warn!( + "SPCR revision {} unsupported interface type {}", + spcr.header.revision, + unsupported + ); + } + } + } else if spcr.header.revision == 1 { + match spcr.interface_type { + //TODO: support more types! + unsupported => { + log::warn!("SPCR revision 1 unsupported interface type {}", unsupported); + } + } + } else { + log::warn!("SPCR unsupported revision {}", spcr.header.revision); + } + } + + pub fn new(sdt: &'static Sdt) -> Option<&'static Spcr> { + if &sdt.signature == b"SPCR" && sdt.length as usize >= mem::size_of::() { + Some(unsafe { &*((sdt as *const Sdt) as *const Spcr) }) + } else { + None + } + } +} diff --git a/src/acpi/xsdt.rs b/src/acpi/xsdt.rs index 7339ce01..5593250d 100644 --- a/src/acpi/xsdt.rs +++ b/src/acpi/xsdt.rs @@ -1,8 +1,7 @@ -use core::mem; use alloc::boxed::Box; +use core::{convert::TryFrom, mem}; -use super::sdt::Sdt; -use super::rxsdt::Rxsdt; +use super::{rxsdt::Rxsdt, sdt::Sdt}; #[derive(Debug)] pub struct Xsdt(&'static Sdt); @@ -15,27 +14,32 @@ impl Xsdt { None } } + pub fn as_slice(&self) -> &[u8] { + let length = + usize::try_from(self.0.length).expect("expected 32-bit length to fit within usize"); + + unsafe { core::slice::from_raw_parts(self.0 as *const _ as *const u8, length) } + } } impl Rxsdt for Xsdt { - fn iter(&self) -> Box> { - Box::new(XsdtIter { - sdt: self.0, - i: 0 - }) + fn iter(&self) -> Box> { + Box::new(XsdtIter { sdt: self.0, i: 0 }) } } pub struct XsdtIter { sdt: &'static Sdt, - i: usize + i: usize, } impl Iterator for XsdtIter { type Item = usize; fn next(&mut self) -> Option { - if self.i < self.sdt.data_len()/mem::size_of::() { - let item = unsafe { *(self.sdt.data_address() as *const u64).offset(self.i as isize) }; + if self.i < self.sdt.data_len() / mem::size_of::() { + let item = unsafe { + core::ptr::read_unaligned((self.sdt.data_address() as *const u64).add(self.i)) + }; self.i += 1; Some(item as usize) } else { diff --git a/src/allocator/linked_list.rs b/src/allocator/linked_list.rs index ddeadf51..b1875434 100644 --- a/src/allocator/linked_list.rs +++ b/src/allocator/linked_list.rs @@ -1,10 +1,11 @@ -use core::alloc::{AllocErr, GlobalAlloc, Layout}; -use core::ptr::NonNull; +use crate::memory::KernelMapper; +use core::{ + alloc::{GlobalAlloc, Layout}, + ptr::{self, NonNull}, +}; use linked_list_allocator::Heap; use spin::Mutex; -use crate::paging::ActivePageTable; - static HEAP: Mutex> = Mutex::new(None); pub struct Allocator; @@ -17,32 +18,25 @@ impl Allocator { unsafe impl GlobalAlloc for Allocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - loop { - let res = if let Some(ref mut heap) = *HEAP.lock() { - heap.allocate_first_fit(layout) - } else { - panic!("__rust_allocate: heap not initialized"); - }; - - match res { - Err(AllocErr) => { - let size = if let Some(ref heap) = *HEAP.lock() { - heap.size() - } else { - panic!("__rust_allocate: heap not initialized"); - }; - - super::map_heap(&mut ActivePageTable::new(), crate::KERNEL_HEAP_OFFSET + size, crate::KERNEL_HEAP_SIZE); - - if let Some(ref mut heap) = *HEAP.lock() { - heap.extend(crate::KERNEL_HEAP_SIZE); - } else { - panic!("__rust_allocate: heap not initialized"); - } - }, - other => return other.ok().map_or(0 as *mut u8, |allocation| allocation.as_ptr()), + while let Some(ref mut heap) = *HEAP.lock() { + match heap.allocate_first_fit(layout) { + Err(()) => { + let size = heap.size(); + super::map_heap( + &mut KernelMapper::lock(), + crate::KERNEL_HEAP_OFFSET + size, + crate::KERNEL_HEAP_SIZE, + ); + heap.extend(crate::KERNEL_HEAP_SIZE); + } + other => { + return other + .ok() + .map_or(ptr::null_mut(), |allocation| allocation.as_ptr()) + } } } + panic!("__rust_allocate: heap not initialized"); } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs index bed1b75d..36a84093 100644 --- a/src/allocator/mod.rs +++ b/src/allocator/mod.rs @@ -1,38 +1,50 @@ -use crate::paging::{ActivePageTable, Page, VirtualAddress}; -use crate::paging::entry::EntryFlags; -use crate::paging::mapper::MapperFlushAll; +use crate::{ + memory::KernelMapper, + paging::{mapper::PageFlushAll, Page, PageFlags, VirtualAddress}, +}; +use rmm::Flusher; -#[cfg(not(feature="slab"))] +#[cfg(not(feature = "slab"))] pub use self::linked_list::Allocator; -#[cfg(feature="slab")] +#[cfg(feature = "slab")] pub use self::slab::Allocator; -#[cfg(not(feature="slab"))] +#[cfg(not(feature = "slab"))] mod linked_list; -#[cfg(feature="slab")] +#[cfg(feature = "slab")] mod slab; -unsafe fn map_heap(active_table: &mut ActivePageTable, offset: usize, size: usize) { - let mut flush_all = MapperFlushAll::new(); +unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) { + let mapper = mapper + .get_mut() + .expect("failed to obtain exclusive access to KernelMapper while extending heap"); + let mut flush_all = PageFlushAll::new(); let heap_start_page = Page::containing_address(VirtualAddress::new(offset)); - let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size-1)); + let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size - 1)); for page in Page::range_inclusive(heap_start_page, heap_end_page) { - let result = active_table.map(page, EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE); + let result = mapper + .map( + page.start_address(), + PageFlags::new() + .write(true) + .global(cfg!(not(feature = "pti"))), + ) + .expect("failed to map kernel heap"); flush_all.consume(result); } - flush_all.flush(active_table); + flush_all.flush(); } -pub unsafe fn init(active_table: &mut ActivePageTable) { +pub unsafe fn init() { let offset = crate::KERNEL_HEAP_OFFSET; let size = crate::KERNEL_HEAP_SIZE; // Map heap pages - map_heap(active_table, offset, size); + map_heap(&mut KernelMapper::lock(), offset, size); // Initialize global heap Allocator::init(offset, size); diff --git a/src/allocator/slab.rs b/src/allocator/slab.rs index 9ada9e68..2185645a 100644 --- a/src/allocator/slab.rs +++ b/src/allocator/slab.rs @@ -1,6 +1,6 @@ use core::alloc::{Alloc, AllocErr, Layout}; -use spin::Mutex; use slab_allocator::Heap; +use spin::Mutex; static HEAP: Mutex> = Mutex::new(None); diff --git a/src/arch/aarch64/consts.rs b/src/arch/aarch64/consts.rs new file mode 100644 index 00000000..4cdd0d87 --- /dev/null +++ b/src/arch/aarch64/consts.rs @@ -0,0 +1,41 @@ +#![allow(unused)] + +// Because the memory map is so important to not be aliased, it is defined here, in one place +// The lower 256 PML4 entries are reserved for userspace +// Each PML4 entry references up to 512 GB of memory +// The second from the top (510) PML4 is reserved for the kernel +/// The size of a single PML4 +pub const PML4_SIZE: usize = 0x0000_0080_0000_0000; +pub const PML4_MASK: usize = 0x0000_ff80_0000_0000; + +/// Offset of recursive paging (deprecated, but still reserved) +pub const RECURSIVE_PAGE_OFFSET: usize = (-(PML4_SIZE as isize)) as usize; +pub const RECURSIVE_PAGE_PML4: usize = (RECURSIVE_PAGE_OFFSET & PML4_MASK) / PML4_SIZE; + +/// Offset of kernel +pub const KERNEL_OFFSET: usize = RECURSIVE_PAGE_OFFSET - PML4_SIZE; +pub const KERNEL_PML4: usize = (KERNEL_OFFSET & PML4_MASK) / PML4_SIZE; + +/// Offset to kernel heap +pub const KERNEL_HEAP_OFFSET: usize = KERNEL_OFFSET - PML4_SIZE; +pub const KERNEL_HEAP_PML4: usize = (KERNEL_HEAP_OFFSET & PML4_MASK) / PML4_SIZE; +/// Size of kernel heap +pub const KERNEL_HEAP_SIZE: usize = 1 * 1024 * 1024; // 1 MB + +/// Offset of temporary mapping for misc kernel bring-up actions +pub const KERNEL_TMP_MISC_OFFSET: usize = KERNEL_HEAP_OFFSET - PML4_SIZE; + +/// Offset to kernel percpu variables +pub const KERNEL_PERCPU_OFFSET: usize = KERNEL_TMP_MISC_OFFSET - PML4_SIZE; +pub const KERNEL_PERCPU_PML4: usize = (KERNEL_PERCPU_OFFSET & PML4_MASK) / PML4_SIZE; +/// Size of kernel percpu variables +pub const KERNEL_PERCPU_SHIFT: u8 = 16; // 2^16 = 64 KiB +pub const KERNEL_PERCPU_SIZE: usize = 1_usize << KERNEL_PERCPU_SHIFT; + +/// Offset of physmap +// This needs to match RMM's PHYS_OFFSET +pub const PHYS_OFFSET: usize = 0xFFFF_8000_0000_0000; +pub const PHYS_PML4: usize = (PHYS_OFFSET & PML4_MASK) / PML4_SIZE; + +/// End offset of the user image, i.e. kernel start +pub const USER_END_OFFSET: usize = 256 * PML4_SIZE; diff --git a/src/arch/aarch64/debug.rs b/src/arch/aarch64/debug.rs new file mode 100644 index 00000000..3f526186 --- /dev/null +++ b/src/arch/aarch64/debug.rs @@ -0,0 +1,58 @@ +use core::fmt; +use spin::MutexGuard; + +use crate::log::{Log, LOG}; + +#[cfg(feature = "serial_debug")] +use super::device::serial::{SerialKind, COM1}; +#[cfg(feature = "graphical_debug")] +use crate::devices::graphical_debug::{DebugDisplay, DEBUG_DISPLAY}; + +pub struct Writer<'a> { + log: MutexGuard<'a, Option>, + #[cfg(feature = "graphical_debug")] + display: MutexGuard<'a, Option>, + #[cfg(feature = "serial_debug")] + serial: MutexGuard<'a, Option>, +} + +impl<'a> Writer<'a> { + pub fn new() -> Writer<'a> { + Writer { + log: LOG.lock(), + #[cfg(feature = "graphical_debug")] + display: DEBUG_DISPLAY.lock(), + #[cfg(feature = "serial_debug")] + serial: COM1.lock(), + } + } + + pub fn write(&mut self, buf: &[u8], preserve: bool) { + if preserve { + if let Some(ref mut log) = *self.log { + log.write(buf); + } + } + + #[cfg(feature = "graphical_debug")] + { + if let Some(ref mut display) = *self.display { + let _ = display.write(buf); + } + } + + #[cfg(feature = "serial_debug")] + { + if let Some(ref mut serial) = *self.serial { + serial.write(buf); + } + } + } +} + +impl<'a> fmt::Write for Writer<'a> { + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + self.write(s.as_bytes(), true); + Ok(()) + } +} diff --git a/src/arch/aarch64/device/cpu/mod.rs b/src/arch/aarch64/device/cpu/mod.rs new file mode 100644 index 00000000..6d9ca8cd --- /dev/null +++ b/src/arch/aarch64/device/cpu/mod.rs @@ -0,0 +1,180 @@ +use core::fmt::{Result, Write}; + +use crate::device::cpu::registers::control_regs; + +pub mod registers; + +bitfield::bitfield! { + pub struct MachineId(u32); + get_implementer, _: 31, 24; + get_variant, _: 23, 20; + get_architecture, _: 19, 16; + get_part_number, _: 15, 4; + get_revision, _: 3, 0; +} + +enum ImplementerID { + Unknown, + Arm, + Broadcom, + Cavium, + Digital, + Infineon, + Motorola, + Nvidia, + AMCC, + Qualcomm, + Marvell, + Intel, +} + +const IMPLEMENTERS: [&'static str; 12] = [ + "Unknown", "Arm", "Broadcom", "Cavium", "Digital", "Infineon", "Motorola", "Nvidia", "AMCC", + "Qualcomm", "Marvell", "Intel", +]; + +enum VariantID { + Unknown, +} + +const VARIANTS: [&'static str; 1] = ["Unknown"]; + +enum ArchitectureID { + Unknown, + V4, + V4T, + V5, + V5T, + V5TE, + V5TEJ, + V6, +} + +const ARCHITECTURES: [&'static str; 8] = + ["Unknown", "v4", "v4T", "v5", "v5T", "v5TE", "v5TEJ", "v6"]; + +enum PartNumberID { + Unknown, + Thunder, + Foundation, + CortexA35, + CortexA53, + CortexA55, + CortexA57, + CortexA72, + CortexA73, + CortexA75, +} + +const PART_NUMBERS: [&'static str; 10] = [ + "Unknown", + "Thunder", + "Foundation", + "Cortex-A35", + "Cortex-A53", + "Cortex-A55", + "Cortex-A57", + "Cortex-A72", + "Cortex-A73", + "Cortex-A75", +]; + +enum RevisionID { + Unknown, + Thunder1_0, + Thunder1_1, +} + +const REVISIONS: [&'static str; 3] = ["Unknown", "Thunder-1.0", "Thunder-1.1"]; + +struct CpuInfo { + implementer: &'static str, + variant: &'static str, + architecture: &'static str, + part_number: &'static str, + revision: &'static str, +} + +impl CpuInfo { + fn new() -> CpuInfo { + let midr = unsafe { control_regs::midr() }; + println!("MIDR: 0x{:x}", midr); + let midr = MachineId(midr); + + let implementer = match midr.get_implementer() { + 0x41 => IMPLEMENTERS[ImplementerID::Arm as usize], + 0x42 => IMPLEMENTERS[ImplementerID::Broadcom as usize], + 0x43 => IMPLEMENTERS[ImplementerID::Cavium as usize], + 0x44 => IMPLEMENTERS[ImplementerID::Digital as usize], + 0x49 => IMPLEMENTERS[ImplementerID::Infineon as usize], + 0x4d => IMPLEMENTERS[ImplementerID::Motorola as usize], + 0x4e => IMPLEMENTERS[ImplementerID::Nvidia as usize], + 0x50 => IMPLEMENTERS[ImplementerID::AMCC as usize], + 0x51 => IMPLEMENTERS[ImplementerID::Qualcomm as usize], + 0x56 => IMPLEMENTERS[ImplementerID::Marvell as usize], + 0x69 => IMPLEMENTERS[ImplementerID::Intel as usize], + _ => IMPLEMENTERS[ImplementerID::Unknown as usize], + }; + + let variant = match midr.get_variant() { + _ => VARIANTS[VariantID::Unknown as usize], + }; + + let architecture = match midr.get_architecture() { + 0b0001 => ARCHITECTURES[ArchitectureID::V4 as usize], + 0b0010 => ARCHITECTURES[ArchitectureID::V4T as usize], + 0b0011 => ARCHITECTURES[ArchitectureID::V5 as usize], + 0b0100 => ARCHITECTURES[ArchitectureID::V5T as usize], + 0b0101 => ARCHITECTURES[ArchitectureID::V5TE as usize], + 0b0110 => ARCHITECTURES[ArchitectureID::V5TEJ as usize], + 0b0111 => ARCHITECTURES[ArchitectureID::V6 as usize], + _ => ARCHITECTURES[ArchitectureID::Unknown as usize], + }; + + let part_number = match midr.get_part_number() { + 0x0a1 => PART_NUMBERS[PartNumberID::Thunder as usize], + 0xd00 => PART_NUMBERS[PartNumberID::Foundation as usize], + 0xd04 => PART_NUMBERS[PartNumberID::CortexA35 as usize], + 0xd03 => PART_NUMBERS[PartNumberID::CortexA53 as usize], + 0xd05 => PART_NUMBERS[PartNumberID::CortexA55 as usize], + 0xd07 => PART_NUMBERS[PartNumberID::CortexA57 as usize], + 0xd08 => PART_NUMBERS[PartNumberID::CortexA72 as usize], + 0xd09 => PART_NUMBERS[PartNumberID::CortexA73 as usize], + 0xd0a => PART_NUMBERS[PartNumberID::CortexA75 as usize], + _ => PART_NUMBERS[PartNumberID::Unknown as usize], + }; + + let revision = match part_number { + "Thunder" => { + let val = match midr.get_revision() { + 0x00 => REVISIONS[RevisionID::Thunder1_0 as usize], + 0x01 => REVISIONS[RevisionID::Thunder1_1 as usize], + _ => REVISIONS[RevisionID::Unknown as usize], + }; + val + } + _ => REVISIONS[RevisionID::Unknown as usize], + }; + + CpuInfo { + implementer, + variant, + architecture, + part_number, + revision, + } + } +} + +pub fn cpu_info(w: &mut W) -> Result { + let cpuinfo = CpuInfo::new(); + + writeln!(w, "Implementer: {}", cpuinfo.implementer)?; + writeln!(w, "Variant: {}", cpuinfo.variant)?; + writeln!(w, "Architecture version: {}", cpuinfo.architecture)?; + writeln!(w, "Part Number: {}", cpuinfo.part_number)?; + writeln!(w, "Revision: {}", cpuinfo.revision)?; + writeln!(w)?; + + Ok(()) +} diff --git a/src/arch/aarch64/device/cpu/registers/control_regs.rs b/src/arch/aarch64/device/cpu/registers/control_regs.rs new file mode 100644 index 00000000..461df7bc --- /dev/null +++ b/src/arch/aarch64/device/cpu/registers/control_regs.rs @@ -0,0 +1,111 @@ +#![allow(unused)] + +//! Functions to read and write control registers. + +use core::arch::asm; + +bitflags! { + pub struct MairEl1: u64 { + const DEVICE_MEMORY = 0x00 << 16; + const NORMAL_UNCACHED_MEMORY = 0x44 << 8; + const NORMAL_WRITEBACK_MEMORY = 0xff; + } +} + +pub unsafe fn ttbr0_el1() -> u64 { + let ret: u64; + asm!("mrs {}, ttbr0_el1", out(reg) ret); + ret +} + +pub unsafe fn ttbr0_el1_write(val: u64) { + asm!("msr ttbr0_el1, {}", in(reg) val); +} + +pub unsafe fn ttbr1_el1() -> u64 { + let ret: u64; + asm!("mrs {}, ttbr1_el1", out(reg) ret); + ret +} + +pub unsafe fn ttbr1_el1_write(val: u64) { + asm!("msr ttbr1_el1, {}", in(reg) val); +} + +pub unsafe fn mair_el1() -> MairEl1 { + let ret: u64; + asm!("mrs {}, mair_el1", out(reg) ret); + MairEl1::from_bits_truncate(ret) +} + +pub unsafe fn mair_el1_write(val: MairEl1) { + asm!("msr mair_el1, {}", in(reg) val.bits()); +} + +pub unsafe fn tpidr_el0() -> u64 { + let ret: u64; + asm!("mrs {}, tpidr_el0", out(reg) ret); + ret +} + +pub unsafe fn tpidr_el0_write(val: u64) { + asm!("msr tpidr_el0, {}", in(reg) val); +} + +pub unsafe fn tpidr_el1() -> u64 { + let ret: u64; + asm!("mrs {}, tpidr_el1", out(reg) ret); + ret +} + +pub unsafe fn tpidr_el1_write(val: u64) { + asm!("msr tpidr_el1, {}", in(reg) val); +} + +pub unsafe fn tpidrro_el0() -> u64 { + let ret: u64; + asm!("mrs {}, tpidrro_el0", out(reg) ret); + ret +} + +pub unsafe fn tpidrro_el0_write(val: u64) { + asm!("msr tpidrro_el0, {}", in(reg) val); +} + +pub unsafe fn esr_el1() -> u32 { + let ret: u32; + asm!("mrs {0:w}, esr_el1", out(reg) ret); + ret +} + +pub unsafe fn cntfreq_el0() -> u32 { + let ret: usize; + asm!("mrs {}, cntfrq_el0", out(reg) ret); + ret as u32 +} + +pub unsafe fn tmr_ctrl() -> u32 { + let ret: usize; + asm!("mrs {}, cntp_ctl_el0", out(reg) ret); + ret as u32 +} + +pub unsafe fn tmr_ctrl_write(val: u32) { + asm!("msr cntp_ctl_el0, {}", in(reg) val as usize); +} + +pub unsafe fn tmr_tval() -> u32 { + let ret: usize; + asm!("mrs {0}, cntp_tval_el0", out(reg) ret); + ret as u32 +} + +pub unsafe fn tmr_tval_write(val: u32) { + asm!("msr cntp_tval_el0, {}", in(reg) val as usize); +} + +pub unsafe fn midr() -> u32 { + let ret: usize; + asm!("mrs {}, midr_el1", out(reg) ret); + ret as u32 +} diff --git a/src/arch/aarch64/device/cpu/registers/mod.rs b/src/arch/aarch64/device/cpu/registers/mod.rs new file mode 100644 index 00000000..62f3adbd --- /dev/null +++ b/src/arch/aarch64/device/cpu/registers/mod.rs @@ -0,0 +1 @@ +pub mod control_regs; diff --git a/src/arch/aarch64/device/generic_timer.rs b/src/arch/aarch64/device/generic_timer.rs new file mode 100644 index 00000000..c65e1267 --- /dev/null +++ b/src/arch/aarch64/device/generic_timer.rs @@ -0,0 +1,120 @@ +use alloc::boxed::Box; +use log::{debug, error, info}; + +use super::ic_for_chip; +use crate::{ + context, + context::timeout, + device::cpu::registers::control_regs, + dtb::{ + get_interrupt, + irqchip::{register_irq, InterruptHandler, IRQ_CHIP}, + }, + interrupt::irq::trigger, + time, +}; +use fdt::Fdt; + +bitflags! { + struct TimerCtrlFlags: u32 { + const ENABLE = 1 << 0; + const IMASK = 1 << 1; + const ISTATUS = 1 << 2; + } +} + +pub unsafe fn init(fdt: &Fdt) { + let mut timer = GenericTimer { + clk_freq: 0, + reload_count: 0, + }; + timer.init(); + if let Some(node) = fdt.find_compatible(&["arm,armv7-timer"]) { + let irq = get_interrupt(fdt, &node, 1).unwrap(); + debug!("irq = {:?}", irq); + if let Some(ic_idx) = ic_for_chip(&fdt, &node) { + //PHYS_NONSECURE_PPI only + let virq = IRQ_CHIP.irq_chip_list.chips[ic_idx] + .ic + .irq_xlate(irq) + .unwrap(); + info!("generic_timer virq = {}", virq); + register_irq(virq as u32, Box::new(timer)); + IRQ_CHIP.irq_enable(virq as u32); + } else { + error!("Failed to find irq parent for generic timer"); + } + } +} + +pub struct GenericTimer { + pub clk_freq: u32, + pub reload_count: u32, +} + +impl GenericTimer { + pub fn init(&mut self) { + let clk_freq = unsafe { control_regs::cntfreq_el0() }; + self.clk_freq = clk_freq; + self.reload_count = clk_freq / 100; + + unsafe { control_regs::tmr_tval_write(self.reload_count) }; + + let mut ctrl = TimerCtrlFlags::from_bits_truncate(unsafe { control_regs::tmr_ctrl() }); + ctrl.insert(TimerCtrlFlags::ENABLE); + ctrl.remove(TimerCtrlFlags::IMASK); + unsafe { + control_regs::tmr_ctrl_write(ctrl.bits()); + } + } + + #[allow(unused)] + fn disable() { + let mut ctrl = TimerCtrlFlags::from_bits_truncate(unsafe { control_regs::tmr_ctrl() }); + ctrl.remove(TimerCtrlFlags::ENABLE); + unsafe { control_regs::tmr_ctrl_write(ctrl.bits()) }; + } + + #[allow(unused)] + pub fn set_irq(&mut self) { + let mut ctrl = TimerCtrlFlags::from_bits_truncate(unsafe { control_regs::tmr_ctrl() }); + ctrl.remove(TimerCtrlFlags::IMASK); + unsafe { control_regs::tmr_ctrl_write(ctrl.bits()) }; + } + + pub fn clear_irq(&mut self) { + let mut ctrl = TimerCtrlFlags::from_bits_truncate(unsafe { control_regs::tmr_ctrl() }); + + if ctrl.contains(TimerCtrlFlags::ISTATUS) { + ctrl.insert(TimerCtrlFlags::IMASK); + unsafe { control_regs::tmr_ctrl_write(ctrl.bits()) }; + } + } + + pub fn reload_count(&mut self) { + let mut ctrl = TimerCtrlFlags::from_bits_truncate(unsafe { control_regs::tmr_ctrl() }); + ctrl.insert(TimerCtrlFlags::ENABLE); + ctrl.remove(TimerCtrlFlags::IMASK); + unsafe { control_regs::tmr_tval_write(self.reload_count) }; + unsafe { control_regs::tmr_ctrl_write(ctrl.bits()) }; + } +} + +impl InterruptHandler for GenericTimer { + fn irq_handler(&mut self, irq: u32) { + + self.clear_irq(); + { + *time::OFFSET.lock() += self.clk_freq as u128; + } + + timeout::trigger(); + + context::switch::tick(); + + unsafe { + trigger(irq); + } + self.reload_count(); + } +} diff --git a/src/arch/aarch64/device/irqchip/gic.rs b/src/arch/aarch64/device/irqchip/gic.rs new file mode 100644 index 00000000..1507360d --- /dev/null +++ b/src/arch/aarch64/device/irqchip/gic.rs @@ -0,0 +1,266 @@ +use super::InterruptController; +use crate::dtb::{ + get_mmio_address, + irqchip::{InterruptHandler, IrqCell, IrqDesc}, +}; +use core::ptr::{read_volatile, write_volatile}; +use fdt::{node::FdtNode, Fdt}; +use log::info; +use syscall::{ + error::{Error, EINVAL}, + Result, +}; + +static GICD_CTLR: u32 = 0x000; +static GICD_TYPER: u32 = 0x004; +static GICD_ISENABLER: u32 = 0x100; +static GICD_ICENABLER: u32 = 0x180; +static GICD_IPRIORITY: u32 = 0x400; +static GICD_ITARGETSR: u32 = 0x800; +static GICD_ICFGR: u32 = 0xc00; + +static GICC_EOIR: u32 = 0x0010; +static GICC_IAR: u32 = 0x000c; +static GICC_CTLR: u32 = 0x0000; +static GICC_PMR: u32 = 0x0004; + +pub struct GenericInterruptController { + pub gic_dist_if: GicDistIf, + pub gic_cpu_if: GicCpuIf, + pub irq_range: (usize, usize), +} + +impl GenericInterruptController { + pub fn new() -> Self { + let gic_dist_if = GicDistIf::default(); + let gic_cpu_if = GicCpuIf::default(); + + GenericInterruptController { + gic_dist_if, + gic_cpu_if, + irq_range: (0, 0), + } + } + pub fn parse(fdt: &Fdt) -> Result<(usize, usize, usize, usize)> { + if let Some(node) = fdt.find_compatible(&["arm,cortex-a15-gic", "arm,gic-400"]) { + return GenericInterruptController::parse_inner(fdt, &node); + } else { + return Err(Error::new(EINVAL)); + } + } + fn parse_inner(fdt: &Fdt, node: &FdtNode) -> Result<(usize, usize, usize, usize)> { + //assert address_cells == 0x2, size_cells == 0x2 + let reg = node.reg().unwrap(); + let mut regs = (0, 0, 0, 0); + let mut idx = 0; + + for chunk in reg { + if chunk.size.is_none() { + break; + } + let addr = get_mmio_address(fdt, node, &chunk).unwrap(); + match idx { + 0 => (regs.0, regs.1) = (addr, chunk.size.unwrap()), + 2 => (regs.2, regs.3) = (addr, chunk.size.unwrap()), + _ => break, + } + idx += 2; + } + + if idx == 4 { + Ok(regs) + } else { + Err(Error::new(EINVAL)) + } + } +} + +impl InterruptHandler for GenericInterruptController { + fn irq_handler(&mut self, _irq: u32) {} +} + +impl InterruptController for GenericInterruptController { + fn irq_init( + &mut self, + fdt_opt: Option<&Fdt>, + irq_desc: &mut [IrqDesc; 1024], + ic_idx: usize, + irq_idx: &mut usize, + ) -> Result<()> { + if let Some(fdt) = fdt_opt { + let (dist_addr, _dist_size, cpu_addr, _cpu_size) = + match GenericInterruptController::parse(fdt) { + Ok(regs) => regs, + Err(err) => return Err(err), + }; + + unsafe { + self.gic_dist_if.init(crate::PHYS_OFFSET + dist_addr); + self.gic_cpu_if.init(crate::PHYS_OFFSET + cpu_addr); + } + } + let idx = *irq_idx; + let cnt = if self.gic_dist_if.nirqs > 1024 { + 1024 + } else { + self.gic_dist_if.nirqs as usize + }; + let mut i: usize = 0; + //only support linear irq map now. + while i < cnt && (idx + i < 1024) { + irq_desc[idx + i].basic.ic_idx = ic_idx; + irq_desc[idx + i].basic.ic_irq = i as u32; + irq_desc[idx + i].basic.used = true; + + i += 1; + } + + info!("gic irq_range = ({}, {})", idx, idx + cnt); + self.irq_range = (idx, idx + cnt); + *irq_idx = idx + cnt; + Ok(()) + } + fn irq_ack(&mut self) -> u32 { + unsafe { self.gic_cpu_if.irq_ack() } + } + fn irq_eoi(&mut self, irq_num: u32) { + unsafe { self.gic_cpu_if.irq_eoi(irq_num) } + } + fn irq_enable(&mut self, irq_num: u32) { + unsafe { self.gic_dist_if.irq_enable(irq_num) } + } + fn irq_disable(&mut self, irq_num: u32) { + unsafe { self.gic_dist_if.irq_disable(irq_num) } + } + fn irq_xlate(&self, irq_data: IrqCell) -> Result { + let off = match irq_data { + IrqCell::L3(0, irq, _flags) => irq as usize + 32, // SPI + IrqCell::L3(1, irq, _flags) => irq as usize + 16, // PPI + _ => return Err(Error::new(EINVAL)), + }; + return Ok(off + self.irq_range.0); + } + fn irq_to_virq(&self, hwirq: u32) -> Option { + if hwirq >= self.gic_dist_if.nirqs { + None + } else { + Some(self.irq_range.0 + hwirq as usize) + } + } +} + +#[derive(Debug, Default)] +pub struct GicDistIf { + pub address: usize, + pub ncpus: u32, + pub nirqs: u32, +} + +impl GicDistIf { + pub unsafe fn init(&mut self, addr: usize) { + self.address = addr; + + // Disable IRQ Distribution + self.write(GICD_CTLR, 0); + + let typer = self.read(GICD_TYPER); + self.ncpus = ((typer & (0x7 << 5)) >> 5) + 1; + self.nirqs = ((typer & 0x1f) + 1) * 32; + info!( + "gic: Distributor supports {:?} CPUs and {:?} IRQs", + self.ncpus, self.nirqs + ); + + // Set all SPIs to level triggered + for irq in (32..self.nirqs).step_by(16) { + self.write(GICD_ICFGR + ((irq / 16) * 4), 0); + } + + // Disable all SPIs + for irq in (32..self.nirqs).step_by(32) { + self.write(GICD_ICENABLER + ((irq / 32) * 4), 0xffff_ffff); + } + + // Affine all SPIs to CPU0 and set priorities for all IRQs + for irq in 0..self.nirqs { + if irq > 31 { + let ext_offset = GICD_ITARGETSR + (4 * (irq / 4)); + let int_offset = irq % 4; + let mut val = self.read(ext_offset); + val |= 0b0000_0001 << (8 * int_offset); + self.write(ext_offset, val); + } + + let ext_offset = GICD_IPRIORITY + (4 * (irq / 4)); + let int_offset = irq % 4; + let mut val = self.read(ext_offset); + val |= 0b0000_0000 << (8 * int_offset); + self.write(ext_offset, val); + } + + // Enable IRQ group 0 and group 1 non-secure distribution + self.write(GICD_CTLR, 0x3); + } + + pub unsafe fn irq_enable(&mut self, irq: u32) { + let offset = GICD_ISENABLER + (4 * (irq / 32)); + let shift = 1 << (irq % 32); + let mut val = self.read(offset); + val |= shift; + self.write(offset, val); + } + + pub unsafe fn irq_disable(&mut self, irq: u32) { + let offset = GICD_ICENABLER + (4 * (irq / 32)); + let shift = 1 << (irq % 32); + let mut val = self.read(offset); + val |= shift; + self.write(offset, val); + } + + unsafe fn read(&self, reg: u32) -> u32 { + let val = read_volatile((self.address + reg as usize) as *const u32); + val + } + + unsafe fn write(&mut self, reg: u32, value: u32) { + write_volatile((self.address + reg as usize) as *mut u32, value); + } +} + +#[derive(Debug, Default)] +pub struct GicCpuIf { + pub address: usize, +} + +impl GicCpuIf { + pub unsafe fn init(&mut self, addr: usize) { + self.address = addr; + + // Enable CPU0's GIC interface + self.write(GICC_CTLR, 1); + // Set CPU0's Interrupt Priority Mask + self.write(GICC_PMR, 0xff); + } + + unsafe fn irq_ack(&mut self) -> u32 { + let irq = self.read(GICC_IAR) & 0x1ff; + if irq == 1023 { + panic!("irq_ack: got ID 1023!!!"); + } + irq + } + + unsafe fn irq_eoi(&mut self, irq: u32) { + self.write(GICC_EOIR, irq); + } + + unsafe fn read(&self, reg: u32) -> u32 { + let val = read_volatile((self.address + reg as usize) as *const u32); + val + } + + unsafe fn write(&mut self, reg: u32, value: u32) { + write_volatile((self.address + reg as usize) as *mut u32, value); + } +} diff --git a/src/arch/aarch64/device/irqchip/gicv3.rs b/src/arch/aarch64/device/irqchip/gicv3.rs new file mode 100644 index 00000000..076965d6 --- /dev/null +++ b/src/arch/aarch64/device/irqchip/gicv3.rs @@ -0,0 +1,187 @@ +use alloc::vec::Vec; +use core::arch::asm; +use fdt::{node::NodeProperty, Fdt}; + +use super::{gic::GicDistIf, InterruptController}; +use crate::dtb::{ + get_mmio_address, + irqchip::{InterruptHandler, IrqCell, IrqDesc}, +}; +use syscall::{ + error::{Error, EINVAL}, + Result, +}; + +#[derive(Debug)] +pub struct GicV3 { + pub gic_dist_if: GicDistIf, + pub gic_cpu_if: GicV3CpuIf, + pub gicrs: Vec<(usize, usize)>, + //TODO: GICC, GICH, GICV? + pub irq_range: (usize, usize), +} + +impl GicV3 { + pub fn new() -> Self { + GicV3 { + gic_dist_if: GicDistIf::default(), + gic_cpu_if: GicV3CpuIf, + gicrs: Vec::new(), + irq_range: (0, 0), + } + } + + pub fn parse(&mut self, fdt: &Fdt) -> Result<()> { + let Some(node) = fdt.find_compatible(&["arm,gic-v3"]) else { + return Err(Error::new(EINVAL)); + }; + + // Clear current registers + //TODO: deinit? + self.gic_dist_if.address = 0; + self.gicrs.clear(); + + // Get number of GICRs + let gicrs = node + .property("#redistributor-regions") + .and_then(NodeProperty::as_usize) + .unwrap_or(1); + + // Read registers + let mut chunks = node.reg().unwrap(); + if let Some(gicd) = chunks.next() + && let Some(addr) = get_mmio_address(fdt, &node, &gicd) + { + unsafe { + self.gic_dist_if.init(crate::PHYS_OFFSET + addr); + } + } + for _ in 0..gicrs { + if let Some(gicr) = chunks.next() { + self.gicrs.push(( + get_mmio_address(fdt, &node, &gicr).unwrap(), + gicr.size.unwrap(), + )); + } + } + + if self.gic_dist_if.address == 0 || self.gicrs.is_empty() { + Err(Error::new(EINVAL)) + } else { + Ok(()) + } + } +} + +impl InterruptHandler for GicV3 { + fn irq_handler(&mut self, _irq: u32) {} +} + +impl InterruptController for GicV3 { + fn irq_init( + &mut self, + fdt_opt: Option<&Fdt>, + irq_desc: &mut [IrqDesc; 1024], + ic_idx: usize, + irq_idx: &mut usize, + ) -> Result<()> { + if let Some(fdt) = fdt_opt { + self.parse(fdt)?; + } + log::info!("{:X?}", self); + + unsafe { + self.gic_cpu_if.init(); + } + let idx = *irq_idx; + let cnt = if self.gic_dist_if.nirqs > 1024 { + 1024 + } else { + self.gic_dist_if.nirqs as usize + }; + let mut i: usize = 0; + //only support linear irq map now. + while i < cnt && (idx + i < 1024) { + irq_desc[idx + i].basic.ic_idx = ic_idx; + irq_desc[idx + i].basic.ic_irq = i as u32; + irq_desc[idx + i].basic.used = true; + + i += 1; + } + + log::info!("gic irq_range = ({}, {})", idx, idx + cnt); + self.irq_range = (idx, idx + cnt); + *irq_idx = idx + cnt; + Ok(()) + } + fn irq_ack(&mut self) -> u32 { + let irq_num = unsafe { self.gic_cpu_if.irq_ack() }; + irq_num + } + fn irq_eoi(&mut self, irq_num: u32) { + unsafe { self.gic_cpu_if.irq_eoi(irq_num) } + } + fn irq_enable(&mut self, irq_num: u32) { + unsafe { self.gic_dist_if.irq_enable(irq_num) } + } + fn irq_disable(&mut self, irq_num: u32) { + unsafe { self.gic_dist_if.irq_disable(irq_num) } + } + fn irq_xlate(&self, irq_data: IrqCell) -> Result { + let off = match irq_data { + IrqCell::L3(0, irq, _flags) => irq as usize + 32, // SPI + IrqCell::L3(1, irq, _flags) => irq as usize + 16, // PPI + _ => return Err(Error::new(EINVAL)), + }; + return Ok(off + self.irq_range.0); + } + fn irq_to_virq(&self, hwirq: u32) -> Option { + if hwirq >= self.gic_dist_if.nirqs { + None + } else { + Some(self.irq_range.0 + hwirq as usize) + } + } +} + +#[derive(Debug)] +pub struct GicV3CpuIf; + +impl GicV3CpuIf { + pub unsafe fn init(&mut self) { + // Enable system register access + { + let value = 1_usize; + asm!("msr icc_sre_el1, {}", in(reg) value); + } + // Set control register + { + let value = 0_usize; + asm!("msr icc_ctlr_el1, {}", in(reg) value); + } + // Enable non-secure group 1 + { + let value = 1_usize; + asm!("msr icc_igrpen1_el1, {}", in(reg) value); + } + // Set CPU0's Interrupt Priority Mask + { + let value = 0xFF_usize; + asm!("msr icc_pmr_el1, {}", in(reg) value); + } + } + + unsafe fn irq_ack(&mut self) -> u32 { + let mut irq: usize; + asm!("mrs {}, icc_iar1_el1", out(reg) irq); + irq &= 0x1ff; + if irq == 1023 { + panic!("irq_ack: got ID 1023!!!"); + } + irq as u32 + } + + unsafe fn irq_eoi(&mut self, irq: u32) { + asm!("msr icc_eoir1_el1, {}", in(reg) irq as usize); + } +} diff --git a/src/arch/aarch64/device/irqchip/irq_bcm2835.rs b/src/arch/aarch64/device/irqchip/irq_bcm2835.rs new file mode 100644 index 00000000..a8a118bd --- /dev/null +++ b/src/arch/aarch64/device/irqchip/irq_bcm2835.rs @@ -0,0 +1,286 @@ +use core::ptr::{read_volatile, write_volatile}; +use fdt::{node::FdtNode, Fdt}; +use log::{debug, error, info}; + +use super::InterruptController; +use crate::dtb::{ + get_interrupt, get_mmio_address, + irqchip::{InterruptHandler, IrqCell, IrqDesc, IRQ_CHIP}, +}; +use syscall::{ + error::{Error, EINVAL}, + Result, +}; + +#[inline(always)] +fn ffs(num: u32) -> u32 { + let mut x = num; + if x == 0 { + return 0; + } + let mut r = 1; + if (x & 0xffff) == 0 { + x >>= 16; + r += 16; + } + if (x & 0xff) == 0 { + x >>= 8; + r += 8; + } + if (x & 0xf) == 0 { + x >>= 4; + r += 4; + } + if (x & 0x3) == 0 { + x >>= 2; + r += 2; + } + if (x & 0x1) == 0 { + r += 1; + } + + r +} + +const PENDING_0: u32 = 0x0; +const PENDING_1: u32 = 0x4; +const PENDING_2: u32 = 0x8; +const ENABLE_0: u32 = 0x18; +const ENABLE_1: u32 = 0x10; +const ENABLE_2: u32 = 0x14; +const DISABLE_0: u32 = 0x24; +const DISABLE_1: u32 = 0x1c; +const DISABLE_2: u32 = 0x20; + +pub struct Bcm2835ArmInterruptController { + pub address: usize, + pub irq_range: (usize, usize), +} + +impl Bcm2835ArmInterruptController { + pub fn new() -> Self { + Bcm2835ArmInterruptController { + address: 0, + irq_range: (0, 0), + } + } + pub fn parse(fdt: &Fdt) -> Result<(usize, usize, Option)> { + if let Some(node) = fdt.find_compatible(&["brcm,bcm2836-armctrl-ic"]) { + return unsafe { Bcm2835ArmInterruptController::parse_inner(fdt, &node) }; + } else { + return Err(Error::new(EINVAL)); + } + } + unsafe fn parse_inner(fdt: &Fdt, node: &FdtNode) -> Result<(usize, usize, Option)> { + //assert address_cells == 0x1, size_cells == 0x1 + let mem = node.reg().unwrap().nth(0).unwrap(); + let base = get_mmio_address(fdt, node, &mem).unwrap(); + let size = mem.size.unwrap() as u32; + let mut ret_virq = None; + + if let Some(interrupt_parent) = node.property("interrupt-parent") { + let phandle = interrupt_parent.as_usize().unwrap() as u32; + let irq = get_interrupt(fdt, node, 0).unwrap(); + let ic_idx = IRQ_CHIP.phandle_to_ic_idx(phandle).unwrap(); + //PHYS_NONSECURE_PPI only + let virq = IRQ_CHIP.irq_chip_list.chips[ic_idx] + .ic + .irq_xlate(irq) + .unwrap(); + info!("register bcm2835arm_ctrl as ic_idx {}'s child virq = {}", ic_idx, virq); + ret_virq = Some(virq); + } + Ok((base as usize, size as usize, ret_virq)) + } + + unsafe fn init(&mut self) { + debug!("IRQ BCM2835 INIT"); + //disable all interrupt + self.write(DISABLE_0, 0xffff_ffff); + self.write(DISABLE_1, 0xffff_ffff); + self.write(DISABLE_2, 0xffff_ffff); + + debug!("IRQ BCM2835 END"); + } + + unsafe fn read(&self, reg: u32) -> u32 { + let val = read_volatile((self.address + reg as usize) as *const u32); + val + } + + unsafe fn write(&mut self, reg: u32, value: u32) { + write_volatile((self.address + reg as usize) as *mut u32, value); + } +} + +impl InterruptController for Bcm2835ArmInterruptController { + fn irq_init( + &mut self, + fdt_opt: Option<&Fdt>, + irq_desc: &mut [IrqDesc; 1024], + ic_idx: usize, + irq_idx: &mut usize, + ) -> Result<()> { + let (base, _size, _virq) = match Bcm2835ArmInterruptController::parse(fdt_opt.unwrap()) { + Ok((a, b, c)) => (a, b, c), + Err(_) => return Err(Error::new(EINVAL)), + }; + unsafe { + self.address = base + crate::PHYS_OFFSET; + + self.init(); + let idx = *irq_idx; + let cnt = 3 << 5; //3 * 32 irqs, basic == 8, reg1 = 32, reg2 = 32 + let mut i: usize = 0; + //only support linear irq map now. + while i < cnt && (idx + i < 1024) { + irq_desc[idx + i].basic.ic_idx = ic_idx; + irq_desc[idx + i].basic.ic_irq = i as u32; + irq_desc[idx + i].basic.used = true; + + i += 1; + } + + info!("bcm2835 irq_range = ({}, {})", idx, idx + cnt); + self.irq_range = (idx, idx + cnt); + *irq_idx = idx + cnt; + } + + Ok(()) + } + + fn irq_ack(&mut self) -> u32 { + //TODO: support smp self.read(LOCAL_IRQ_PENDING + 4 * cpu) + let sources = unsafe { self.read(PENDING_0) }; + let pending_num = ffs(sources) - 1; + let fast_irq = [ + 7 + 32, + 9 + 32, + 10 + 32, + 18 + 32, + 19 + 32, + 21 + 64, + 22 + 64, + 23 + 64, + 24 + 64, + 25 + 64, + 30 + 64, + ]; + + //fast irq + if pending_num >= 10 && pending_num <= 20 { + return fast_irq[(pending_num - 10) as usize]; + } + + let pending_num = ffs(sources & 0x3ff) - 1; + match pending_num { + num @ 0..=7 => return num, + 8 => { + let sources1 = unsafe { self.read(PENDING_1) }; + let irq_0_31 = ffs(sources1) - 1; + return irq_0_31 + 32; + } + 9 => { + let sources2 = unsafe { self.read(PENDING_2) }; + let irq_32_63 = ffs(sources2) - 1; + return irq_32_63 + 64; + } + num => { + error!( + "unexpected irq pending in BASIC PENDING: 0x{}, sources = 0x{:08x}", + num, sources + ); + return num; + } + } + } + + fn irq_eoi(&mut self, _irq_num: u32) {} + + fn irq_enable(&mut self, irq_num: u32) { + debug!("bcm2835 enable {} {}", irq_num, irq_num & 0x1f); + match irq_num { + num @ 0..=31 => { + let val = 1 << num; + unsafe { + self.write(ENABLE_0, val); + } + } + num @ 32..=63 => { + let val = 1 << (num & 0x1f); + unsafe { + self.write(ENABLE_1, val); + } + } + num @ 64..=95 => { + let val = 1 << (num & 0x1f); + unsafe { + self.write(ENABLE_2, val); + } + } + _ => return, + } + } + + fn irq_disable(&mut self, irq_num: u32) { + match irq_num { + num @ 0..=31 => { + let val = 1 << num; + unsafe { + self.write(DISABLE_0, val); + } + } + num @ 32..=63 => { + let val = 1 << (num & 0x1f); + unsafe { + self.write(DISABLE_1, val); + } + } + num @ 64..=95 => { + let val = 1 << (num & 0x1f); + unsafe { + self.write(DISABLE_2, val); + } + } + _ => return, + } + } + fn irq_xlate(&self, irq_data: IrqCell) -> Result { + //assert interrupt-cells == 0x2 + match irq_data { + IrqCell::L2(bank, irq) => { + //TODO: check bank && irq + let hwirq = (bank as usize) << 5 | (irq as usize); + let off = hwirq + self.irq_range.0; + Ok(off) + } + _ => Err(Error::new(EINVAL)), + } + } + + fn irq_to_virq(&self, hwirq: u32) -> Option { + if hwirq > 95 { + None + } else { + Some(self.irq_range.0 + hwirq as usize) + } + } +} + +impl InterruptHandler for Bcm2835ArmInterruptController { + fn irq_handler(&mut self, _irq: u32) { + unsafe { + let irq = self.irq_ack(); + if let Some(virq) = self.irq_to_virq(irq) + && virq < 1024 + { + if let Some(handler) = &mut IRQ_CHIP.irq_desc[virq].handler { + handler.irq_handler(virq as u32); + } + } else { + error!("unexpected irq num {}", irq); + } + self.irq_eoi(irq); + } + } +} diff --git a/src/arch/aarch64/device/irqchip/irq_bcm2836.rs b/src/arch/aarch64/device/irqchip/irq_bcm2836.rs new file mode 100644 index 00000000..730555ce --- /dev/null +++ b/src/arch/aarch64/device/irqchip/irq_bcm2836.rs @@ -0,0 +1,221 @@ +use super::InterruptController; +use crate::{arch::device::{ROOT_IC_IDX, ROOT_IC_IDX_IS_SET}, dtb::{ + get_mmio_address, + irqchip::{InterruptHandler, IrqCell, IrqDesc}, +}}; +use core::{ + arch::asm, + ptr::{read_volatile, write_volatile}, sync::atomic::Ordering, +}; +use fdt::{node::FdtNode, Fdt}; +use log::{debug, info}; +use syscall::{ + error::{Error, EINVAL}, + Result, +}; + +const LOCAL_CONTROL: u32 = 0x000; +const LOCAL_PRESCALER: u32 = 0x008; +const LOCAL_GPU_ROUTING: u32 = 0x00C; +const LOCAL_TIMER_INT_CONTROL0: u32 = 0x040; +const LOCAL_IRQ_PENDING: u32 = 0x060; + +const LOCAL_IRQ_CNTPNSIRQ: u32 = 0x1; +const LOCAL_IRQ_GPU_FAST: u32 = 0x8; +const LOCAL_IRQ_PMU_FAST: u32 = 0x9; +const LOCAL_IRQ_LAST: u32 = LOCAL_IRQ_PMU_FAST; + +#[inline(always)] +fn ffs(num: u32) -> u32 { + let mut x = num; + if x == 0 { + return 0; + } + let mut r = 1; + if (x & 0xffff) == 0 { + x >>= 16; + r += 16; + } + if (x & 0xff) == 0 { + x >>= 8; + r += 8; + } + if (x & 0xf) == 0 { + x >>= 4; + r += 4; + } + if (x & 0x3) == 0 { + x >>= 2; + r += 2; + } + if (x & 0x1) == 0 { + r += 1; + } + + r +} + +pub struct Bcm2836ArmInterruptController { + pub address: usize, + pub irq_range: (usize, usize), + pub active_cpu: u32, +} + +impl Bcm2836ArmInterruptController { + pub fn new() -> Self { + Bcm2836ArmInterruptController { + address: 0, + irq_range: (0, 0), + active_cpu: 0, + } + } + pub fn parse(fdt: &Fdt) -> Result<(usize, usize)> { + if let Some(node) = fdt.find_compatible(&["brcm,bcm2836-l1-intc"]) { + return Bcm2836ArmInterruptController::parse_inner(fdt, &node); + } else { + return Err(Error::new(EINVAL)); + } + } + fn parse_inner(fdt: &Fdt, node: &FdtNode) -> Result<(usize, usize)> { + //assert address_cells == 0x1, size_cells == 0x1 + let reg = node.reg().unwrap().nth(0).unwrap(); + let addr = get_mmio_address(fdt, node, ®).unwrap(); + + Ok((addr, reg.size.unwrap())) + } + + unsafe fn init(&mut self) { + debug!("IRQ BCM2836 INIT"); + //init local timer freq + self.write(LOCAL_CONTROL, 0x0); + self.write(LOCAL_PRESCALER, 0x8000_0000); + + //routing all irq to core + self.write(LOCAL_GPU_ROUTING, self.active_cpu); + debug!("routing all irq to core {}", self.active_cpu); + debug!("IRQ BCM2836 END"); + } + + unsafe fn read(&self, reg: u32) -> u32 { + let val = read_volatile((self.address + reg as usize) as *const u32); + val + } + + unsafe fn write(&mut self, reg: u32, value: u32) { + write_volatile((self.address + reg as usize) as *mut u32, value); + } +} + +impl InterruptHandler for Bcm2836ArmInterruptController { + fn irq_handler(&mut self, _irq: u32) {} +} + +impl InterruptController for Bcm2836ArmInterruptController { + fn irq_init( + &mut self, + fdt_opt: Option<&Fdt>, + irq_desc: &mut [IrqDesc; 1024], + ic_idx: usize, + irq_idx: &mut usize, + ) -> Result<()> { + let (base, _size) = match Bcm2836ArmInterruptController::parse(fdt_opt.unwrap()) { + Ok((a, b)) => (a, b), + Err(_) => return Err(Error::new(EINVAL)), + }; + unsafe { + self.address = base + crate::PHYS_OFFSET; + let cpuid: usize; + asm!("mrs {}, mpidr_el1", out(reg) cpuid); + self.active_cpu = cpuid as u32 & 0x3; + + self.init(); + let idx = *irq_idx; + let cnt = LOCAL_IRQ_LAST as usize; + let mut i: usize = 0; + //only support linear irq map now. + while i < cnt && (idx + i < 1024) { + irq_desc[idx + i].basic.ic_idx = ic_idx; + irq_desc[idx + i].basic.ic_irq = i as u32; + irq_desc[idx + i].basic.used = true; + + i += 1; + } + + info!("bcm2836 irq_range = ({}, {})", idx, idx + cnt); + self.irq_range = (idx, idx + cnt); + *irq_idx = idx + cnt; + } + + //raspi 3b+ dts doesn't follow the rule to set root parent interrupt controller + //so we should set it manually. + ROOT_IC_IDX.store(ic_idx, Ordering::Relaxed); + ROOT_IC_IDX_IS_SET.store(1, Ordering::Relaxed); + + Ok(()) + } + + fn irq_ack(&mut self) -> u32 { + let cpuid: usize; + unsafe { + asm!("mrs {}, mpidr_el1", out(reg) cpuid); + } + let cpu = cpuid as u32 & 0x3; + let sources: u32 = unsafe { self.read(LOCAL_IRQ_PENDING + 4 * cpu) }; + ffs(sources) - 1 + } + + fn irq_eoi(&mut self, _irq_num: u32) {} + + fn irq_enable(&mut self, irq_num: u32) { + debug!("bcm2836 enable {}", irq_num); + match irq_num { + LOCAL_IRQ_CNTPNSIRQ => unsafe { + let cpuid: usize; + asm!("mrs {}, mpidr_el1", out(reg) cpuid); + let cpu = cpuid as u32 & 0x3; + let mut reg_val = self.read(LOCAL_TIMER_INT_CONTROL0 + 4 * cpu); + reg_val |= 0x2; + self.write(LOCAL_TIMER_INT_CONTROL0 + 4 * cpu, reg_val); + }, + LOCAL_IRQ_GPU_FAST => { + //GPU IRQ always enable + } + _ => { + //ignore + } + } + } + + fn irq_disable(&mut self, irq_num: u32) { + match irq_num { + LOCAL_IRQ_CNTPNSIRQ => unsafe { + let cpuid: usize; + asm!("mrs {}, mpidr_el1", out(reg) cpuid); + let cpu = cpuid as u32 & 0x3; + let mut reg_val = self.read(LOCAL_TIMER_INT_CONTROL0 + 4 * cpu); + reg_val &= !0x2; + self.write(LOCAL_TIMER_INT_CONTROL0 + 4 * cpu, reg_val); + }, + LOCAL_IRQ_GPU_FAST => { + //GPU IRQ always enable + } + _ => { + //ignore + } + } + } + fn irq_xlate(&self, irq_data: IrqCell) -> Result { + //assert interrupt-cells == 0x2 + match irq_data { + IrqCell::L2(irq, _) => Ok(irq as usize + self.irq_range.0), + _ => Err(Error::new(EINVAL)), + } + } + fn irq_to_virq(&self, hwirq: u32) -> Option { + if hwirq > LOCAL_IRQ_LAST { + None + } else { + Some(self.irq_range.0 + hwirq as usize) + } + } +} diff --git a/src/arch/aarch64/device/irqchip/mod.rs b/src/arch/aarch64/device/irqchip/mod.rs new file mode 100644 index 00000000..05f890d0 --- /dev/null +++ b/src/arch/aarch64/device/irqchip/mod.rs @@ -0,0 +1,41 @@ +use crate::dtb::irqchip::{InterruptController, IRQ_CHIP}; +use alloc::boxed::Box; +use fdt::{node::FdtNode, Fdt}; + +pub(crate) mod gic; +pub(crate) mod gicv3; +mod irq_bcm2835; +mod irq_bcm2836; +mod null; + +pub(crate) fn new_irqchip(ic_str: &str) -> Option> { + if ic_str.contains("arm,gic-v3") { + Some(Box::new(gicv3::GicV3::new())) + } else if ic_str.contains("arm,cortex-a15-gic") || ic_str.contains("arm,gic-400") { + Some(Box::new(gic::GenericInterruptController::new())) + } else if ic_str.contains("brcm,bcm2836-l1-intc") { + Some(Box::new(irq_bcm2836::Bcm2836ArmInterruptController::new())) + } else if ic_str.contains("brcm,bcm2836-armctrl-ic") { + Some(Box::new(irq_bcm2835::Bcm2835ArmInterruptController::new())) + } else { + log::warn!("no driver for interrupt controller {:?}", ic_str); + //TODO: return None and handle it properly + Some(Box::new(null::Null)) + } +} + +pub(crate) fn ic_for_chip(fdt: &Fdt, node: &FdtNode) -> Option { + if let Some(_) = node.property("interrupts-extended") { + log::error!("multi-parented device not supported"); + None + } else if let Some(irqc_phandle) = node + .property("interrupt-parent") + .or(fdt.root().property("interrupt-parent")) + .and_then(|f| f.as_usize()) + { + unsafe { IRQ_CHIP.phandle_to_ic_idx(irqc_phandle as u32) } + } else { + log::error!("no irq parent found"); + None + } +} diff --git a/src/arch/aarch64/device/irqchip/null.rs b/src/arch/aarch64/device/irqchip/null.rs new file mode 100644 index 00000000..706e1875 --- /dev/null +++ b/src/arch/aarch64/device/irqchip/null.rs @@ -0,0 +1,38 @@ +use fdt::Fdt; +use syscall::{ + error::{Error, EINVAL}, + Result, +}; + +use super::InterruptController; +use crate::dtb::irqchip::{InterruptHandler, IrqCell, IrqDesc}; + +pub struct Null; + +impl InterruptHandler for Null { + fn irq_handler(&mut self, _irq: u32) {} +} + +impl InterruptController for Null { + fn irq_init( + &mut self, + _fdt_opt: Option<&Fdt>, + _irq_desc: &mut [IrqDesc; 1024], + _ic_idx: usize, + _irq_idx: &mut usize, + ) -> Result<()> { + Ok(()) + } + fn irq_ack(&mut self) -> u32 { + unimplemented!() + } + fn irq_eoi(&mut self, _irq_num: u32) {} + fn irq_enable(&mut self, _irq_num: u32) {} + fn irq_disable(&mut self, _irq_num: u32) {} + fn irq_xlate(&self, _irq_data: IrqCell) -> Result { + Err(Error::new(EINVAL)) + } + fn irq_to_virq(&self, _hwirq: u32) -> Option { + None + } +} diff --git a/src/arch/aarch64/device/mod.rs b/src/arch/aarch64/device/mod.rs new file mode 100644 index 00000000..7699fc9e --- /dev/null +++ b/src/arch/aarch64/device/mod.rs @@ -0,0 +1,53 @@ +use crate::info; +use core::sync::atomic::{AtomicUsize, Ordering}; +use fdt::Fdt; + +pub mod cpu; +pub mod generic_timer; +pub mod irqchip; +pub mod rtc; +pub mod serial; +pub mod uart_pl011; + +use crate::dtb::irqchip::IRQ_CHIP; +use irqchip::ic_for_chip; + +pub static ROOT_IC_IDX: AtomicUsize = AtomicUsize::new(0); +pub static ROOT_IC_IDX_IS_SET: AtomicUsize = AtomicUsize::new(0); + +unsafe fn init_root_ic(fdt: &Fdt) { + + let is_set = ROOT_IC_IDX_IS_SET.load(Ordering::Relaxed); + if is_set != 0 { + let ic_idx = ROOT_IC_IDX.load(Ordering::Relaxed); + info!("Already selected {} as root ic", ic_idx); + return ; + } + + let root_irqc_phandle = fdt + .root() + .property("interrupt-parent") + .unwrap() + .as_usize() + .unwrap(); + let ic_idx = IRQ_CHIP + .phandle_to_ic_idx(root_irqc_phandle as u32) + .unwrap(); + info!("select {} as root ic", ic_idx); + ROOT_IC_IDX.store(ic_idx, Ordering::Relaxed); +} + +pub unsafe fn init_devicetree(fdt: &Fdt) { + info!("IRQCHIP INIT"); + crate::dtb::irqchip::init(&fdt); + init_root_ic(&fdt); + info!("GIT INIT"); + generic_timer::init(fdt); + info!("SERIAL INIT"); + serial::init(fdt); + info!("RTC INIT"); + rtc::init(fdt); +} + +#[derive(Default)] +pub struct ArchPercpuMisc; diff --git a/src/arch/aarch64/device/rtc.rs b/src/arch/aarch64/device/rtc.rs new file mode 100644 index 00000000..6553ace3 --- /dev/null +++ b/src/arch/aarch64/device/rtc.rs @@ -0,0 +1,40 @@ +use crate::{dtb::get_mmio_address, time}; +use core::ptr::read_volatile; + +static RTC_DR: usize = 0x000; + +pub unsafe fn init(fdt: &fdt::Fdt) { + if let Some(node) = fdt.find_compatible(&["arm,pl031"]) { + match node + .reg() + .and_then(|mut iter| iter.next()) + .and_then(|region| get_mmio_address(fdt, &node, ®ion)) + { + Some(phys) => { + let mut rtc = Pl031rtc { phys }; + log::info!("PL031 RTC at {:#x}", rtc.phys); + *time::START.lock() = (rtc.time() as u128) * time::NANOS_PER_SEC; + } + None => { + log::warn!("No PL031 RTC registers"); + } + } + } else { + log::warn!("No PL031 RTC found"); + } +} + +struct Pl031rtc { + pub phys: usize, +} + +impl Pl031rtc { + unsafe fn read(&self, reg: usize) -> u32 { + read_volatile((crate::PHYS_OFFSET + self.phys + reg) as *const u32) + } + + pub fn time(&mut self) -> u64 { + let seconds = unsafe { self.read(RTC_DR) } as u64; + seconds + } +} diff --git a/src/arch/aarch64/device/serial.rs b/src/arch/aarch64/device/serial.rs new file mode 100644 index 00000000..a81287f0 --- /dev/null +++ b/src/arch/aarch64/device/serial.rs @@ -0,0 +1,138 @@ +use alloc::boxed::Box; +use spin::Mutex; + +use crate::{ + arch::device::irqchip::ic_for_chip, + device::uart_pl011, + devices::uart_16550, + dtb::{ + diag_uart_range, get_interrupt, + irqchip::{register_irq, InterruptHandler, IRQ_CHIP}, + }, + interrupt::irq::trigger, + scheme::debug::{debug_input, debug_notify}, +}; +use fdt::Fdt; +use log::{error, info}; +use syscall::Mmio; + +pub enum SerialKind { + Ns16550u8(&'static mut uart_16550::SerialPort>), + Ns16550u32(&'static mut uart_16550::SerialPort>), + Pl011(uart_pl011::SerialPort), +} + +impl SerialKind { + pub fn enable_irq(&mut self) { + //TODO: implement for NS16550 + match self { + Self::Ns16550u8(_) => {} + Self::Ns16550u32(_) => {} + Self::Pl011(inner) => inner.enable_irq(), + } + } + + pub fn receive(&mut self) { + //TODO: make PL011 receive work the same way as NS16550 + match self { + Self::Ns16550u8(inner) => { + while let Some(c) = inner.receive() { + debug_input(c); + } + debug_notify(); + } + Self::Ns16550u32(inner) => { + while let Some(c) = inner.receive() { + debug_input(c); + } + debug_notify(); + } + Self::Pl011(inner) => inner.receive(), + } + } + + pub fn write(&mut self, buf: &[u8]) { + match self { + Self::Ns16550u8(inner) => inner.write(buf), + Self::Ns16550u32(inner) => inner.write(buf), + Self::Pl011(inner) => inner.write(buf), + } + } +} + +pub static COM1: Mutex> = Mutex::new(None); + +pub struct Com1Irq {} + +impl InterruptHandler for Com1Irq { + fn irq_handler(&mut self, irq: u32) { + if let Some(ref mut serial_port) = *COM1.lock() { + serial_port.receive(); + }; + unsafe { + trigger(irq); + } + } +} + +pub unsafe fn init_early(dtb: &Fdt) { + if COM1.lock().is_some() { + // Hardcoded UART + return; + } + + if let Some((phys, size, skip_init, cts, compatible)) = diag_uart_range(dtb) { + let virt = crate::PHYS_OFFSET + phys; + let serial_opt = if compatible.contains("arm,pl011") { + let mut serial_port = uart_pl011::SerialPort::new(virt, cts); + if !skip_init { + serial_port.init(false); + } + Some(SerialKind::Pl011(serial_port)) + } else if compatible.contains("ns16550a") || compatible.contains("snps,dw-apb-uart") { + //TODO: get actual register size from device tree + let serial_port = uart_16550::SerialPort::>::new(virt); + if !skip_init { + serial_port.init(); + } + Some(SerialKind::Ns16550u32(serial_port)) + } else { + None + }; + match serial_opt { + Some(serial) => { + info!("UART {:?} at {:#X} size {:#X}", compatible, virt, size); + *COM1.lock() = Some(serial); + } + None => { + log::warn!( + "UART {:?} at {:#X} size {:#X}: no driver found", + compatible, + virt, + size + ); + } + } + } +} + +pub unsafe fn init(fdt: &Fdt) { + //TODO: find actual serial device, not just any PL011 + if let Some(node) = fdt.find_compatible(&["arm,pl011"]) { + let irq = get_interrupt(fdt, &node, 0).unwrap(); + if let Some(ic_idx) = ic_for_chip(&fdt, &node) { + let virq = IRQ_CHIP.irq_chip_list.chips[ic_idx] + .ic + .irq_xlate(irq) + .unwrap(); + info!("serial_port virq = {}", virq); + register_irq(virq as u32, Box::new(Com1Irq {})); + IRQ_CHIP.irq_enable(virq as u32); + } else { + error!("serial port irq parent not found"); + } + } + if let Some(ref mut serial_port) = *COM1.lock() { + serial_port.enable_irq(); + } +} diff --git a/src/arch/aarch64/device/uart_pl011.rs b/src/arch/aarch64/device/uart_pl011.rs new file mode 100644 index 00000000..8e6c98a2 --- /dev/null +++ b/src/arch/aarch64/device/uart_pl011.rs @@ -0,0 +1,250 @@ +use core::ptr; + +use crate::scheme::debug::{debug_input, debug_notify}; + +bitflags! { + /// UARTFR + #[derive(Clone, Copy, Debug)] + struct UartFrFlags: u32 { + const TXFE = 1 << 7; + const RXFF = 1 << 6; + const TXFF = 1 << 5; + const RXFE = 1 << 4; + const BUSY = 1 << 3; + } +} + +bitflags! { + /// UARTCR + #[derive(Clone, Copy, Debug)] + struct UartCrFlags: u32 { + const RXE = 1 << 9; + const TXE = 1 << 8; + const UARTEN = 1 << 0; + } +} + +bitflags! { + // UARTIMSC + #[derive(Clone, Copy, Debug)] + struct UartImscFlags: u32 { + const RTIM = 1 << 6; + const TXIM = 1 << 5; + const RXIM = 1 << 4; + } +} + +bitflags! { + // UARTICR + #[derive(Clone, Copy, Debug)] + struct UartIcrFlags: u32 { + const RTIC = 1 << 6; + const TXIC = 1 << 5; + const RXIC = 1 << 4; + } +} + +bitflags! { + // UARTRIS + #[derive(Clone, Copy, Debug)] + struct UartRisFlags: u32 { + const RTIS = 1 << 6; + const TXIS = 1 << 5; + const RXIS = 1 << 4; + } +} + +bitflags! { + //UARTMIS + #[derive(Clone, Copy, Debug)] + struct UartMisFlags: u32 { + const TXMIS = 1 << 5; + const RXMIS = 1 << 4; + } +} + +bitflags! { + //UARTLCR_H + #[derive(Clone, Copy, Debug)] + struct UartLcrhFlags: u32 { + const FEN = 1 << 4; + } +} + +bitflags! { + //UARTIFLS + #[derive(Clone, Copy, Debug)] + struct UartIflsFlags: u32 { + const RX1_8 = 0 << 3; + const RX2_8 = 1 << 3; + const RX4_8 = 2 << 3; + const RX6_8 = 3 << 3; + const RX7_8 = 4 << 3; + const TX1_8 = 0 << 0; + const TX2_8 = 1 << 0; + const TX4_8 = 2 << 0; + const TX6_8 = 3 << 0; + const TX7_8 = 4 << 0; + } +} + +#[allow(dead_code)] +pub struct SerialPort { + base: usize, + data_reg: u8, + rcv_stat_reg: u8, + flag_reg: u8, + int_baud_reg: u8, + frac_baud_reg: u8, + line_ctrl_reg: u8, + ctrl_reg: u8, + ifls_reg: u8, + intr_mask_setclr_reg: u8, + raw_intr_stat_reg: u8, + masked_intr_stat_reg: u8, + intr_clr_reg: u8, + dma_ctrl_reg: u8, + ifls: u32, + fifo_size: u32, + cts_event_walkaround: bool, +} + +impl SerialPort { + pub const fn new(base: usize, cts_event_walkaround: bool) -> SerialPort { + SerialPort { + base: base, + data_reg: 0x00, + rcv_stat_reg: 0x04, + flag_reg: 0x18, + int_baud_reg: 0x24, + frac_baud_reg: 0x28, + line_ctrl_reg: 0x2c, + ctrl_reg: 0x30, + ifls_reg: 0x34, + intr_mask_setclr_reg: 0x38, + raw_intr_stat_reg: 0x3c, + masked_intr_stat_reg: 0x40, + intr_clr_reg: 0x44, + dma_ctrl_reg: 0x48, + ifls: 0x12, // RX4_8 | TX4_8 + fifo_size: 32, + cts_event_walkaround: cts_event_walkaround, + } + } + + pub fn read_reg(&self, register: u8) -> u32 { + unsafe { ptr::read_volatile((self.base + register as usize) as *mut u32) } + } + + pub fn write_reg(&self, register: u8, data: u32) { + unsafe { + ptr::write_volatile((self.base + register as usize) as *mut u32, data); + } + } + + pub fn init(&mut self, with_irq: bool) { + //Disable UART first + self.write_reg(self.ctrl_reg, 0x0); + + //Setup ifls + self.write_reg(self.ifls_reg, self.ifls); + + //Enable FIFO + if self.fifo_size > 1 { + let mut flags = UartLcrhFlags::from_bits_truncate(self.read_reg(self.line_ctrl_reg)); + flags |= UartLcrhFlags::FEN; + self.write_reg(self.line_ctrl_reg, flags.bits()); + } + + // Enable RX, TX, UART + let flags = UartCrFlags::RXE | UartCrFlags::TXE | UartCrFlags::UARTEN; + self.write_reg(self.ctrl_reg, flags.bits()); + + if with_irq { + self.enable_irq(); + } + } + + fn line_sts(&self) -> UartFrFlags { + UartFrFlags::from_bits_truncate(self.read_reg(self.flag_reg)) + } + + fn intr_stats(&self) -> UartRisFlags { + UartRisFlags::from_bits_truncate(self.read_reg(self.raw_intr_stat_reg)) + } + + pub fn drain_fifo(&mut self) { + for _ in 0..self.fifo_size * 2 { + if self.line_sts().contains(UartFrFlags::RXFE) { + break; + } + let _ = self.read_reg(self.data_reg); + } + } + + pub fn receive(&mut self) { + let mut flags = self.intr_stats(); + let chk_flags = UartRisFlags::RTIS | UartRisFlags::RXIS; + while (flags & chk_flags).bits() != 0 { + if self.cts_event_walkaround { + self.write_reg(self.intr_clr_reg, 0x00); + let _ = self.read_reg(self.intr_clr_reg); + let _ = self.read_reg(self.intr_clr_reg); + } + + let clr = flags & (!chk_flags); + self.write_reg(self.intr_clr_reg, clr.bits()); + + for _ in 0..256 { + if self.line_sts().contains(UartFrFlags::RXFE) { + break; + } + let c = self.read_reg(self.data_reg) as u8; + if c != 0 { + debug_input(c); + } + } + + flags = self.intr_stats(); + } + debug_notify(); + } + + pub fn send(&mut self, data: u8) { + while !self.line_sts().contains(UartFrFlags::TXFE) {} + self.write_reg(self.data_reg, data as u32); + } + + pub fn clear_all_irqs(&mut self) { + let flags = UartIcrFlags::RTIC | UartIcrFlags::RXIC; + self.write_reg(self.intr_clr_reg, flags.bits()); + } + + pub fn enable_irq(&mut self) { + self.clear_all_irqs(); + + self.drain_fifo(); + + let flags = UartImscFlags::RXIM | UartImscFlags::RTIM; + self.write_reg(self.intr_mask_setclr_reg, flags.bits()); + } + + pub fn write(&mut self, buf: &[u8]) { + for &b in buf { + match b { + 8 | 0x7F => { + self.send(8); + self.send(b' '); + self.send(8); + } + b'\n' => { + self.send(b'\r'); + self.send(b'\n'); + } + _ => { + self.send(b); + } + } + } + } +} diff --git a/src/arch/aarch64/interrupt/exception.rs b/src/arch/aarch64/interrupt/exception.rs new file mode 100644 index 00000000..bd996f79 --- /dev/null +++ b/src/arch/aarch64/interrupt/exception.rs @@ -0,0 +1,220 @@ +use ::syscall::Exception; +use rmm::VirtualAddress; + +use crate::{ + context::signal::excp_handler, + exception_stack, + memory::{ArchIntCtx, GenericPfFlags}, + panic::stack_trace, + syscall::{self, flag::*}, +}; + +use super::InterruptStack; + +exception_stack!(synchronous_exception_at_el1_with_sp0, |stack| { + println!("Synchronous exception at EL1 with SP0"); + stack.dump(); + stack_trace(); + loop {} +}); + +fn exception_code(esr: usize) -> u8 { + ((esr >> 26) & 0x3f) as u8 +} +fn iss(esr: usize) -> u32 { + (esr & 0x01ff_ffff) as u32 +} + +unsafe fn far_el1() -> usize { + let ret: usize; + core::arch::asm!("mrs {}, far_el1", out(reg) ret); + ret +} + +unsafe fn instr_data_abort_inner( + stack: &mut InterruptStack, + from_user: bool, + instr_not_data: bool, + _from: &str, +) -> bool { + let iss = iss(stack.iret.esr_el1); + let fsc = iss & 0x3F; + //dbg!(fsc); + + let was_translation_fault = fsc >= 0b000100 && fsc <= 0b000111; + //let was_permission_fault = fsc >= 0b001101 && fsc <= 0b001111; + let write_not_read_if_data = iss & (1 << 6) != 0; + + let mut flags = GenericPfFlags::empty(); + flags.set(GenericPfFlags::PRESENT, !was_translation_fault); + + // TODO: RMW instructions may "involve" writing to (possibly invalid) memory, but AArch64 + // doesn't appear to require that flag to be set if the read alone would trigger a fault. + flags.set( + GenericPfFlags::INVOLVED_WRITE, + write_not_read_if_data && !instr_not_data, + ); + flags.set(GenericPfFlags::INSTR_NOT_DATA, instr_not_data); + flags.set(GenericPfFlags::USER_NOT_SUPERVISOR, from_user); + + let faulting_addr = VirtualAddress::new(far_el1()); + //dbg!(faulting_addr, flags, from); + + crate::memory::page_fault_handler(stack, flags, faulting_addr).is_ok() +} + +unsafe fn cntfrq_el0() -> usize { + let ret: usize; + core::arch::asm!("mrs {}, cntfrq_el0", out(reg) ret); + ret +} + +unsafe fn cntpct_el0() -> usize { + let ret: usize; + core::arch::asm!("mrs {}, cntpct_el0", out(reg) ret); + ret +} + +unsafe fn cntvct_el0() -> usize { + let ret: usize; + core::arch::asm!("mrs {}, cntvct_el0", out(reg) ret); + ret +} + +unsafe fn instr_trapped_msr_mrs_inner( + stack: &mut InterruptStack, + _from_user: bool, + _instr_not_data: bool, + _from: &str, +) -> bool { + let iss = iss(stack.iret.esr_el1); + // let res0 = (iss & 0x1C0_0000) >> 22; + let op0 = (iss & 0x030_0000) >> 20; + let op2 = (iss & 0x00e_0000) >> 17; + let op1 = (iss & 0x001_c000) >> 14; + let crn = (iss & 0x000_3c00) >> 10; + let rt = (iss & 0x000_03e0) >> 5; + let crm = (iss & 0x000_001e) >> 1; + let dir = iss & 0x000_0001; + + /* + print!("iss=0x{:x}, res0=0b{:03b}, op0=0b{:02b}\n + op2=0b{:03b}, op1=0b{:03b}, crn=0b{:04b}\n + rt=0b{:05b}, crm=0b{:04b}, dir=0b{:b}\n", + iss, res0, op0, op2, op1, crn, rt, crm, dir); + */ + + match (op0, op1, crn, crm, op2, dir) { + //MRS , CNTFRQ_EL0 + (0b11, 0b011, 0b1110, 0b0000, 0b000, 0b1) => { + let reg_val = cntfrq_el0(); + stack.store_reg(rt as usize, reg_val); + //skip faulting instruction, A64 instructions are always 32-bits + stack.iret.elr_el1 += 4; + return true; + } + //MRS , CNTPCT_EL0 + (0b11, 0b011, 0b1110, 0b0000, 0b001, 0b1) => { + let reg_val = cntpct_el0(); + stack.store_reg(rt as usize, reg_val); + //skip faulting instruction, A64 instructions are always 32-bits + stack.iret.elr_el1 += 4; + return true; + } + //MRS , CNTVCT_EL0 + (0b11, 0b011, 0b1110, 0b0000, 0b010, 0b1) => { + let reg_val = cntvct_el0(); + stack.store_reg(rt as usize, reg_val); + //skip faulting instruction, A64 instructions are always 32-bits + stack.iret.elr_el1 += 4; + return true; + } + _ => {} + } + + false +} + +exception_stack!(synchronous_exception_at_el1_with_spx, |stack| { + if !pf_inner( + stack, + exception_code(stack.iret.esr_el1), + "sync_exc_el1_spx", + ) { + println!("Synchronous exception at EL1 with SPx"); + if exception_code(stack.iret.esr_el1) == 0b100101 { + let far_el1 = far_el1(); + println!("FAR_EL1 = 0x{:08x}", far_el1); + } else if exception_code(stack.iret.esr_el1) == 0b100100 { + let far_el1 = far_el1(); + println!("USER FAR_EL1 = 0x{:08x}", far_el1); + } + stack.dump(); + stack_trace(); + loop {} + } +}); +unsafe fn pf_inner(stack: &mut InterruptStack, ty: u8, from: &str) -> bool { + match ty { + // "Data Abort taken from a lower Exception level" + 0b100100 => instr_data_abort_inner(stack, true, false, from), + // "Data Abort taken without a change in Exception level" + 0b100101 => instr_data_abort_inner(stack, false, false, from), + // "Instruction Abort taken from a lower Exception level" + 0b100000 => instr_data_abort_inner(stack, true, true, from), + // "Instruction Abort taken without a change in Exception level" + 0b100001 => instr_data_abort_inner(stack, false, true, from), + // "Trapped MSR, MRS or System instruction execution in AArch64 state" + 0b011000 => instr_trapped_msr_mrs_inner(stack, true, true, from), + + _ => return false, + } +} + +exception_stack!(synchronous_exception_at_el0, |stack| { + match exception_code(stack.iret.esr_el1) { + 0b010101 => { + let scratch = &stack.scratch; + let ret = syscall::syscall( + scratch.x8, scratch.x0, scratch.x1, scratch.x2, scratch.x3, scratch.x4, + ); + stack.scratch.x0 = ret; + } + + ty => { + if !pf_inner(stack, ty as u8, "sync_exc_el0") { + log::error!( + "FATAL: Not an SVC induced synchronous exception (ty={:b})", + ty + ); + println!("FAR_EL1: {:#0x}", far_el1()); + //crate::debugger::debugger(None); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 0, // TODO + }); + } + } + } +}); + +exception_stack!(unhandled_exception, |stack| { + println!("Unhandled exception"); + stack.dump(); + stack_trace(); + loop {} +}); + +impl ArchIntCtx for InterruptStack { + fn ip(&self) -> usize { + self.iret.elr_el1 + } + fn recover_and_efault(&mut self) { + // Set the return value to nonzero to indicate usercopy failure (EFAULT), and emulate the + // return instruction by setting the return pointer to the saved LR value. + + self.iret.elr_el1 = self.preserved.x30; + self.scratch.x0 = 1; + } +} diff --git a/src/arch/aarch64/interrupt/handler.rs b/src/arch/aarch64/interrupt/handler.rs new file mode 100644 index 00000000..c21d0e9f --- /dev/null +++ b/src/arch/aarch64/interrupt/handler.rs @@ -0,0 +1,415 @@ +use crate::syscall::IntRegisters; + +#[derive(Default)] +#[repr(C, packed)] +pub struct ScratchRegisters { + pub x0: usize, + pub x1: usize, + pub x2: usize, + pub x3: usize, + pub x4: usize, + pub x5: usize, + pub x6: usize, + pub x7: usize, + pub x8: usize, + pub x9: usize, + pub x10: usize, + pub x11: usize, + pub x12: usize, + pub x13: usize, + pub x14: usize, + pub x15: usize, + pub x16: usize, + pub x17: usize, + pub x18: usize, + pub _padding: usize, +} + +impl ScratchRegisters { + pub fn a(&self) -> usize { + self.x0 + } + pub fn b(&self) -> usize { + self.x1 + } + pub fn dump(&self) { + println!("X0: {:>016X}", { self.x0 }); + println!("X1: {:>016X}", { self.x1 }); + println!("X2: {:>016X}", { self.x2 }); + println!("X3: {:>016X}", { self.x3 }); + println!("X4: {:>016X}", { self.x4 }); + println!("X5: {:>016X}", { self.x5 }); + println!("X6: {:>016X}", { self.x6 }); + println!("X7: {:>016X}", { self.x7 }); + println!("X8: {:>016X}", { self.x8 }); + println!("X9: {:>016X}", { self.x9 }); + println!("X10: {:>016X}", { self.x10 }); + println!("X11: {:>016X}", { self.x11 }); + println!("X12: {:>016X}", { self.x12 }); + println!("X13: {:>016X}", { self.x13 }); + println!("X14: {:>016X}", { self.x14 }); + println!("X15: {:>016X}", { self.x15 }); + println!("X16: {:>016X}", { self.x16 }); + println!("X17: {:>016X}", { self.x17 }); + println!("X18: {:>016X}", { self.x18 }); + } +} + +#[derive(Default)] +#[repr(C, packed)] +pub struct PreservedRegisters { + //TODO: is X30 a preserved register? + pub x19: usize, + pub x20: usize, + pub x21: usize, + pub x22: usize, + pub x23: usize, + pub x24: usize, + pub x25: usize, + pub x26: usize, + pub x27: usize, + pub x28: usize, + pub x29: usize, + pub x30: usize, +} + +impl PreservedRegisters { + pub fn dump(&self) { + println!("X19: {:>016X}", { self.x19 }); + println!("X20: {:>016X}", { self.x20 }); + println!("X21: {:>016X}", { self.x21 }); + println!("X22: {:>016X}", { self.x22 }); + println!("X23: {:>016X}", { self.x23 }); + println!("X24: {:>016X}", { self.x24 }); + println!("X25: {:>016X}", { self.x25 }); + println!("X26: {:>016X}", { self.x26 }); + println!("X27: {:>016X}", { self.x27 }); + println!("X28: {:>016X}", { self.x28 }); + println!("X29: {:>016X}", { self.x29 }); + println!("X30: {:>016X}", { self.x30 }); + } +} + +#[derive(Default)] +#[repr(C, packed)] +pub struct IretRegisters { + // occurred + // The exception vector disambiguates at which EL the interrupt + pub sp_el0: usize, // Shouldn't be used if interrupt occurred at EL1 + pub esr_el1: usize, + pub spsr_el1: usize, + pub elr_el1: usize, +} + +impl IretRegisters { + pub fn dump(&self) { + println!("ELR_EL1: {:>016X}", { self.elr_el1 }); + println!("SPSR_EL1: {:>016X}", { self.spsr_el1 }); + println!("ESR_EL1: {:>016X}", { self.esr_el1 }); + println!("SP_EL0: {:>016X}", { self.sp_el0 }); + } +} + +#[derive(Default)] +#[repr(C, packed)] +pub struct InterruptStack { + pub iret: IretRegisters, + pub scratch: ScratchRegisters, + pub preserved: PreservedRegisters, +} + +impl InterruptStack { + pub fn init(&mut self) {} + pub fn set_stack_pointer(&mut self, sp: usize) { + self.iret.sp_el0 = sp; + } + pub fn sig_archdep_reg(&self) -> usize { + self.scratch.x0 + } + pub fn set_instr_pointer(&mut self, ip: usize) { + self.iret.elr_el1 = ip; + } + pub fn instr_pointer(&self) -> usize { + self.iret.elr_el1 + } + pub fn dump(&self) { + self.iret.dump(); + self.scratch.dump(); + self.preserved.dump(); + } + + /// Saves all registers to a struct used by the proc: + /// scheme to read/write registers. + pub fn save(&self, all: &mut IntRegisters) { + /*TODO: aarch64 registers + all.elr_el1 = self.iret.elr_el1; + all.spsr_el1 = self.iret.spsr_el1; + all.esr_el1 = self.iret.esr_el1; + all.sp_el0 = self.iret.sp_el0; + all.padding = 0; + */ + all.x30 = self.preserved.x30; + all.x29 = self.preserved.x29; + all.x28 = self.preserved.x28; + all.x27 = self.preserved.x27; + all.x26 = self.preserved.x26; + all.x25 = self.preserved.x25; + all.x24 = self.preserved.x24; + all.x23 = self.preserved.x23; + all.x22 = self.preserved.x22; + all.x21 = self.preserved.x21; + all.x20 = self.preserved.x20; + all.x19 = self.preserved.x19; + all.x18 = self.scratch.x18; + all.x17 = self.scratch.x17; + all.x16 = self.scratch.x16; + all.x15 = self.scratch.x15; + all.x14 = self.scratch.x14; + all.x13 = self.scratch.x13; + all.x12 = self.scratch.x12; + all.x11 = self.scratch.x11; + all.x10 = self.scratch.x10; + all.x9 = self.scratch.x9; + all.x8 = self.scratch.x8; + all.x7 = self.scratch.x7; + all.x6 = self.scratch.x6; + all.x5 = self.scratch.x5; + all.x4 = self.scratch.x4; + all.x3 = self.scratch.x3; + all.x2 = self.scratch.x2; + all.x1 = self.scratch.x1; + all.x0 = self.scratch.x0; + } + + /// Loads all registers from a struct used by the proc: + /// scheme to read/write registers. + pub fn load(&mut self, all: &IntRegisters) { + /*TODO: aarch64 registers + self.iret.elr_el1 = all.elr_el1; + self.iret.spsr_el1 = all.spsr_el1; + self.iret.esr_el1 = all.esr_el1; + self.iret.sp_el0 = all.sp_el0; + */ + self.preserved.x30 = all.x30; + self.preserved.x29 = all.x29; + self.preserved.x28 = all.x28; + self.preserved.x27 = all.x27; + self.preserved.x26 = all.x26; + self.preserved.x25 = all.x25; + self.preserved.x24 = all.x24; + self.preserved.x23 = all.x23; + self.preserved.x22 = all.x22; + self.preserved.x21 = all.x21; + self.preserved.x20 = all.x20; + self.preserved.x19 = all.x19; + self.scratch.x18 = all.x18; + self.scratch.x17 = all.x17; + self.scratch.x16 = all.x16; + self.scratch.x15 = all.x15; + self.scratch.x14 = all.x14; + self.scratch.x13 = all.x13; + self.scratch.x12 = all.x12; + self.scratch.x11 = all.x11; + self.scratch.x10 = all.x10; + self.scratch.x9 = all.x9; + self.scratch.x8 = all.x8; + self.scratch.x7 = all.x7; + self.scratch.x6 = all.x6; + self.scratch.x5 = all.x5; + self.scratch.x4 = all.x4; + self.scratch.x3 = all.x3; + self.scratch.x2 = all.x2; + self.scratch.x1 = all.x1; + self.scratch.x0 = all.x0; + } + + /// Store a specific generic registers + pub fn store_reg(&mut self, idx: usize, val: usize) { + match idx { + 0 => self.scratch.x0 = val, + 1 => self.scratch.x1 = val, + 2 => self.scratch.x2 = val, + 3 => self.scratch.x3 = val, + 4 => self.scratch.x4 = val, + 5 => self.scratch.x5 = val, + 6 => self.scratch.x6 = val, + 7 => self.scratch.x7 = val, + 8 => self.scratch.x8 = val, + 9 => self.scratch.x9 = val, + 10 => self.scratch.x10 = val, + 11 => self.scratch.x11 = val, + 12 => self.scratch.x12 = val, + 13 => self.scratch.x13 = val, + 14 => self.scratch.x14 = val, + 15 => self.scratch.x15 = val, + 16 => self.scratch.x16 = val, + 17 => self.scratch.x17 = val, + 18 => self.scratch.x18 = val, + 19 => self.preserved.x19 = val, + 20 => self.preserved.x20 = val, + 21 => self.preserved.x21 = val, + 22 => self.preserved.x22 = val, + 23 => self.preserved.x23 = val, + 24 => self.preserved.x24 = val, + 25 => self.preserved.x25 = val, + 26 => self.preserved.x26 = val, + 27 => self.preserved.x27 = val, + 28 => self.preserved.x28 = val, + 29 => self.preserved.x29 = val, + 30 => self.preserved.x30 = val, + _ => {} + } + } + + //TODO + pub fn set_singlestep(&mut self, _singlestep: bool) {} +} + +#[macro_export] +macro_rules! aarch64_asm { + ($($strings:expr,)+) => { + core::arch::global_asm!(concat!( + $($strings),+, + )); + }; +} + +#[macro_export] +macro_rules! push_scratch { + () => { + " + // Push scratch registers + str x18, [sp, #-16]! + stp x16, x17, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x2, x3, [sp, #-16]! + stp x0, x1, [sp, #-16]! + " + }; +} + +#[macro_export] +macro_rules! pop_scratch { + () => { + " + // Pop scratch registers + ldp x0, x1, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x16, x17, [sp], #16 + ldr x18, [sp], #16 + " + }; +} + +#[macro_export] +macro_rules! push_preserved { + () => { + " + // Push preserved registers + stp x29, x30, [sp, #-16]! + stp x27, x28, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! + " + }; +} + +#[macro_export] +macro_rules! pop_preserved { + () => { + " + // Pop preserved registers + ldp x19, x20, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x27, x28, [sp], #16 + ldp x29, x30, [sp], #16 + " + }; +} + +#[macro_export] +macro_rules! push_special { + () => { + " + mrs x14, spsr_el1 + mrs x15, elr_el1 + stp x14, x15, [sp, #-16]! + + mrs x14, sp_el0 + mrs x15, esr_el1 + stp x14, x15, [sp, #-16]! + " + }; +} + +#[macro_export] +macro_rules! pop_special { + () => { + " + ldp x14, x15, [sp], 16 + msr esr_el1, x15 + msr sp_el0, x14 + + ldp x14, x15, [sp], 16 + msr elr_el1, x15 + msr spsr_el1, x14 + " + }; +} + +#[macro_export] +macro_rules! exception_stack { + ($name:ident, |$stack:ident| $code:block) => { + #[naked] + #[no_mangle] + pub unsafe extern "C" fn $name(stack: &mut $crate::arch::aarch64::interrupt::InterruptStack) { + unsafe extern "C" fn inner($stack: &mut $crate::arch::aarch64::interrupt::InterruptStack) { + $code + } + core::arch::naked_asm!(concat!( + // Backup all userspace registers to stack + push_preserved!(), + push_scratch!(), + push_special!(), + + // Call inner function with pointer to stack + "mov x29, sp\n", + "mov x0, sp\n", + "bl {}", + + // Restore all userspace registers + pop_special!(), + pop_scratch!(), + pop_preserved!(), + + "eret\n", + ), sym inner); + } + }; +} +#[naked] +pub unsafe extern "C" fn enter_usermode() -> ! { + core::arch::naked_asm!(concat!( + "blr x28\n", + // Restore all userspace registers + pop_special!(), + pop_scratch!(), + pop_preserved!(), + "eret\n", + )); +} diff --git a/src/arch/aarch64/interrupt/irq.rs b/src/arch/aarch64/interrupt/irq.rs new file mode 100644 index 00000000..6a52e755 --- /dev/null +++ b/src/arch/aarch64/interrupt/irq.rs @@ -0,0 +1,62 @@ +use crate::{arch::device::ROOT_IC_IDX, dtb::irqchip::IRQ_CHIP}; +use core::sync::atomic::Ordering; + +#[cfg(feature = "sys_stat")] +use crate::percpu::PercpuBlock; + +unsafe fn irq_ack() -> (u32, Option) { + let ic = &mut IRQ_CHIP.irq_chip_list.chips[ROOT_IC_IDX.load(Ordering::Relaxed)].ic; + let irq = ic.irq_ack(); + (irq, ic.irq_to_virq(irq)) +} + +exception_stack!(irq_at_el0, |_stack| { + let (irq, virq) = irq_ack(); + if let Some(virq) = virq + && virq < 1024 + { + IRQ_CHIP.trigger_virq(virq as u32); + } else { + println!("unexpected irq num {}", irq); + } +}); + +exception_stack!(irq_at_el1, |_stack| { + let (irq, virq) = irq_ack(); + if let Some(virq) = virq + && virq < 1024 + { + IRQ_CHIP.trigger_virq(virq as u32); + } else { + println!("unexpected irq num {}", irq); + } +}); + +//TODO +pub unsafe fn trigger(irq: u32) { + #[cfg(feature = "sys_stat")] + PercpuBlock::current().stats.add_irq(irq); + + extern "C" { + fn irq_trigger(irq: u32); + } + + irq_trigger(irq); + IRQ_CHIP.irq_eoi(irq); +} + +/* +pub unsafe fn irq_handler_gentimer(irq: u32) { + GENTIMER.clear_irq(); + { + *time::OFFSET.lock() += GENTIMER.clk_freq as u128; + } + + timeout::trigger(); + + context::switch::tick(); + + trigger(irq); + GENTIMER.reload_count(); +} +*/ diff --git a/src/arch/aarch64/interrupt/mod.rs b/src/arch/aarch64/interrupt/mod.rs new file mode 100644 index 00000000..d5a26c8b --- /dev/null +++ b/src/arch/aarch64/interrupt/mod.rs @@ -0,0 +1,50 @@ +//! Interrupt instructions + +use core::arch::asm; + +#[macro_use] +pub mod handler; + +pub mod exception; +pub mod irq; +pub mod syscall; +pub mod trace; + +pub use self::handler::InterruptStack; + +/// Clear interrupts +#[inline(always)] +pub unsafe fn disable() { + asm!("msr daifset, #2"); +} + +/// Set interrupts and halt +/// This will atomically wait for the next interrupt +/// Performing enable followed by halt is not guaranteed to be atomic, use this instead! +#[inline(always)] +pub unsafe fn enable_and_halt() { + asm!("msr daifclr, #2"); + asm!("wfi"); +} + +/// Set interrupts and nop +/// This will enable interrupts and allow the IF flag to be processed +/// Simply enabling interrupts does not gurantee that they will trigger, use this instead! +#[inline(always)] +pub unsafe fn enable_and_nop() { + asm!("msr daifclr, #2"); + asm!("nop"); +} + +/// Halt instruction +#[inline(always)] +pub unsafe fn halt() { + asm!("wfi"); +} + +/// Pause instruction +/// Safe because it is similar to a NOP, and has no memory effects +#[inline(always)] +pub fn pause() { + unsafe { asm!("nop") }; +} diff --git a/src/arch/aarch64/interrupt/syscall.rs b/src/arch/aarch64/interrupt/syscall.rs new file mode 100644 index 00000000..40bb7e16 --- /dev/null +++ b/src/arch/aarch64/interrupt/syscall.rs @@ -0,0 +1,49 @@ +#[no_mangle] +pub unsafe extern "C" fn do_exception_unhandled() {} + +#[no_mangle] +pub unsafe extern "C" fn do_exception_synchronous() {} + +#[allow(dead_code)] +#[repr(C, packed)] +pub struct SyscallStack { + pub elr_el1: usize, + pub padding: usize, + pub tpidr: usize, + pub tpidrro: usize, + pub rflags: usize, + pub esr: usize, + pub sp: usize, + pub lr: usize, + pub fp: usize, + pub x28: usize, + pub x27: usize, + pub x26: usize, + pub x25: usize, + pub x24: usize, + pub x23: usize, + pub x22: usize, + pub x21: usize, + pub x20: usize, + pub x19: usize, + pub x18: usize, + pub x17: usize, + pub x16: usize, + pub x15: usize, + pub x14: usize, + pub x13: usize, + pub x12: usize, + pub x11: usize, + pub x10: usize, + pub x9: usize, + pub x8: usize, + pub x7: usize, + pub x6: usize, + pub x5: usize, + pub x4: usize, + pub x3: usize, + pub x2: usize, + pub x1: usize, + pub x0: usize, +} +pub use super::handler::enter_usermode; diff --git a/src/arch/aarch64/interrupt/trace.rs b/src/arch/aarch64/interrupt/trace.rs new file mode 100644 index 00000000..b5ddb71a --- /dev/null +++ b/src/arch/aarch64/interrupt/trace.rs @@ -0,0 +1,28 @@ +use core::{arch::asm, mem}; + +pub struct StackTrace { + pub fp: usize, + pub pc_ptr: *const usize, +} + +impl StackTrace { + #[inline(always)] + pub unsafe fn start() -> Option { + let fp: usize; + asm!("mov {}, fp", out(reg) fp); + let pc_ptr = fp.checked_add(mem::size_of::())?; + Some(StackTrace { + fp, + pc_ptr: pc_ptr as *const usize, + }) + } + + pub unsafe fn next(self) -> Option { + let fp = *(self.fp as *const usize); + let pc_ptr = fp.checked_add(mem::size_of::())?; + Some(StackTrace { + fp: fp, + pc_ptr: pc_ptr as *const usize, + }) + } +} diff --git a/src/arch/aarch64/ipi.rs b/src/arch/aarch64/ipi.rs new file mode 100644 index 00000000..97905807 --- /dev/null +++ b/src/arch/aarch64/ipi.rs @@ -0,0 +1,28 @@ +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum IpiKind { + Wakeup = 0x40, + Tlb = 0x41, +} + +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum IpiTarget { + Other = 3, +} + +#[cfg(not(feature = "multi_core"))] +#[inline(always)] +pub fn ipi(_kind: IpiKind, _target: IpiTarget) {} + +#[cfg(feature = "multi_core")] +#[inline(always)] +pub fn ipi(_kind: IpiKind, _target: IpiTarget) {} + +#[cfg(not(feature = "multi_core"))] +#[inline(always)] +pub fn ipi_single(_kind: IpiKind, _target: crate::cpu_set::LogicalCpuId) {} + +#[cfg(feature = "multi_core")] +#[inline(always)] +pub fn ipi_single(_kind: IpiKind, _target: crate::cpu_set::LogicalCpuId) {} diff --git a/src/arch/aarch64/macros.rs b/src/arch/aarch64/macros.rs new file mode 100644 index 00000000..4e3566fc --- /dev/null +++ b/src/arch/aarch64/macros.rs @@ -0,0 +1,16 @@ +/// Print to console +#[macro_export] +macro_rules! print { + ($($arg:tt)*) => ({ + use core::fmt::Write; + let _ = write!($crate::arch::debug::Writer::new(), $($arg)*); + }); +} + +/// Print with new line to console +#[macro_export] +macro_rules! println { + () => (print!("\n")); + ($fmt:expr) => (print!(concat!($fmt, "\n"))); + ($fmt:expr, $($arg:tt)*) => (print!(concat!($fmt, "\n"), $($arg)*)); +} diff --git a/src/arch/aarch64/misc.rs b/src/arch/aarch64/misc.rs new file mode 100644 index 00000000..8620f7e9 --- /dev/null +++ b/src/arch/aarch64/misc.rs @@ -0,0 +1,21 @@ +use crate::{ + cpu_set::LogicalCpuId, + paging::{RmmA, RmmArch}, + percpu::PercpuBlock, +}; + +impl PercpuBlock { + pub fn current() -> &'static Self { + unsafe { &*(crate::device::cpu::registers::control_regs::tpidr_el1() as *const Self) } + } +} + +#[cold] +pub unsafe fn init(cpu_id: LogicalCpuId) { + let frame = crate::memory::allocate_frame().expect("failed to allocate percpu memory"); + let virt = RmmA::phys_to_virt(frame.base()).data() as *mut PercpuBlock; + + virt.write(PercpuBlock::init(cpu_id)); + + crate::device::cpu::registers::control_regs::tpidr_el1_write(virt as u64); +} diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs new file mode 100644 index 00000000..16714c18 --- /dev/null +++ b/src/arch/aarch64/mod.rs @@ -0,0 +1,73 @@ +#[macro_use] +pub mod macros; + +/// Constants like memory locations +pub mod consts; + +/// Debugging support +pub mod debug; + +/// Devices +pub mod device; + +/// Interrupt instructions +pub mod interrupt; + +/// Inter-processor interrupts +pub mod ipi; + +/// Miscellaneous +pub mod misc; + +/// Paging +pub mod paging; + +pub mod rmm; + +/// Initialization and start function +pub mod start; + +/// Stop function +pub mod stop; + +// Interrupt vectors +pub mod vectors; + +pub mod time; + +pub use ::rmm::AArch64Arch as CurrentRmmArch; + +pub use arch_copy_to_user as arch_copy_from_user; + +#[naked] +#[link_section = ".usercopy-fns"] +pub unsafe extern "C" fn arch_copy_to_user(dst: usize, src: usize, len: usize) -> u8 { + // x0, x1, x2 + core::arch::naked_asm!( + " + mov x4, x0 + mov x0, 0 + 2: + cmp x2, 0 + b.eq 3f + + ldrb w3, [x1] + strb w3, [x4] + + add x4, x4, 1 + add x1, x1, 1 + sub x2, x2, 1 + + b 2b + 3: + ret + " + ); +} + +pub const KFX_SIZE: usize = 1024; + +// This function exists as the KFX size is dynamic on x86_64. +pub fn kfx_size() -> usize { + KFX_SIZE +} diff --git a/src/arch/aarch64/paging/entry.rs b/src/arch/aarch64/paging/entry.rs new file mode 100644 index 00000000..63c78e5d --- /dev/null +++ b/src/arch/aarch64/paging/entry.rs @@ -0,0 +1,9 @@ +//! # Page table entry +//! Some code borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) + +bitflags! { + pub struct EntryFlags: usize { + const NO_CACHE = 1 << 2; + const DEV_MEM = 2 << 2; + } +} diff --git a/src/arch/aarch64/paging/mapper.rs b/src/arch/aarch64/paging/mapper.rs new file mode 100644 index 00000000..73dd4893 --- /dev/null +++ b/src/arch/aarch64/paging/mapper.rs @@ -0,0 +1,22 @@ +use crate::ipi::{ipi, IpiKind, IpiTarget}; + +use super::RmmA; + +pub use rmm::{Flusher, PageFlush, PageFlushAll}; + +pub struct InactiveFlusher { + _inner: (), +} +impl Flusher for InactiveFlusher { + fn consume(&mut self, flush: PageFlush) { + // TODO: Push to TLB "mailbox" or tell it to reload CR3 if there are too many entries. + unsafe { + flush.ignore(); + } + } +} +impl Drop for InactiveFlusher { + fn drop(&mut self) { + ipi(IpiKind::Tlb, IpiTarget::Other); + } +} diff --git a/src/arch/aarch64/paging/mod.rs b/src/arch/aarch64/paging/mod.rs new file mode 100644 index 00000000..09906ae6 --- /dev/null +++ b/src/arch/aarch64/paging/mod.rs @@ -0,0 +1,100 @@ +//! # Paging +//! Some code was borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) + +use crate::device::cpu::registers::control_regs; + +pub use super::CurrentRmmArch as RmmA; +pub use rmm::{Arch as RmmArch, PageFlags, PhysicalAddress, TableKind, VirtualAddress}; + +pub type PageMapper = rmm::PageMapper; + +pub mod entry; +pub mod mapper; + +/// Size of pages +pub const PAGE_SIZE: usize = RmmA::PAGE_SIZE; +pub const PAGE_MASK: usize = RmmA::PAGE_OFFSET_MASK; + +/// Setup Memory Access Indirection Register +#[cold] +unsafe fn init_mair() { + let mut val: control_regs::MairEl1 = control_regs::mair_el1(); + + val.insert(control_regs::MairEl1::DEVICE_MEMORY); + val.insert(control_regs::MairEl1::NORMAL_UNCACHED_MEMORY); + val.insert(control_regs::MairEl1::NORMAL_WRITEBACK_MEMORY); + + control_regs::mair_el1_write(val); +} + +/// Initialize MAIR +#[cold] +pub unsafe fn init() { + init_mair(); +} + +/// Page +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Page { + number: usize, +} + +impl Page { + pub fn start_address(self) -> VirtualAddress { + VirtualAddress::new(self.number * PAGE_SIZE) + } + + pub fn containing_address(address: VirtualAddress) -> Page { + //TODO assert!(address.data() < 0x0000_8000_0000_0000 || address.data() >= 0xffff_8000_0000_0000, + // "invalid address: 0x{:x}", address.data()); + Page { + number: address.data() / PAGE_SIZE, + } + } + + pub fn range_inclusive(start: Page, r#final: Page) -> PageIter { + PageIter { + start, + end: r#final.next(), + } + } + pub fn next(self) -> Page { + self.next_by(1) + } + pub fn next_by(self, n: usize) -> Page { + Self { + number: self.number + n, + } + } + pub fn offset_from(self, other: Self) -> usize { + self.number - other.number + } +} + +pub struct PageIter { + start: Page, + end: Page, +} + +impl Iterator for PageIter { + type Item = Page; + + fn next(&mut self) -> Option { + if self.start < self.end { + let page = self.start; + self.start = self.start.next(); + Some(page) + } else { + None + } + } +} + +/// Round down to the nearest multiple of page size +pub fn round_down_pages(number: usize) -> usize { + number - number % PAGE_SIZE +} +/// Round up to the nearest multiple of page size +pub fn round_up_pages(number: usize) -> usize { + round_down_pages(number + PAGE_SIZE - 1) +} diff --git a/src/arch/aarch64/rmm.rs b/src/arch/aarch64/rmm.rs new file mode 100644 index 00000000..453382c4 --- /dev/null +++ b/src/arch/aarch64/rmm.rs @@ -0,0 +1,17 @@ +use rmm::{Arch, PageFlags, VirtualAddress}; + +pub unsafe fn page_flags(virt: VirtualAddress) -> PageFlags { + use crate::kernel_executable_offsets::*; + let virt_addr = virt.data(); + + if virt_addr >= __text_start() && virt_addr < __text_end() { + // Remap text read-only, execute + PageFlags::new().execute(true) + } else if virt_addr >= __rodata_start() && virt_addr < __rodata_end() { + // Remap rodata read-only, no execute + PageFlags::new() + } else { + // Remap everything else read-write, no execute + PageFlags::new().write(true) + } +} diff --git a/src/arch/aarch64/start.rs b/src/arch/aarch64/start.rs new file mode 100644 index 00000000..ec0641bc --- /dev/null +++ b/src/arch/aarch64/start.rs @@ -0,0 +1,258 @@ +/// This function is where the kernel sets up IRQ handlers +/// It is increcibly unsafe, and should be minimal in nature +/// It must create the IDT with the correct entries, those entries are +/// defined in other files inside of the `arch` module +use core::slice; +use core::sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering}; + +#[cfg(feature = "graphical_debug")] +use crate::devices::graphical_debug; + +use fdt::Fdt; +use log::info; + +use crate::{ + allocator, device, dtb, + dtb::register_dev_memory_ranges, + paging, + startup::memory::{register_bootloader_areas, register_memory_region, BootloaderMemoryKind}, +}; + +/// Test of zero values in BSS. +static mut BSS_TEST_ZERO: usize = 0; +/// Test of non-zero values in data. +static mut DATA_TEST_NONZERO: usize = 0xFFFF_FFFF_FFFF_FFFF; + +pub static KERNEL_BASE: AtomicUsize = AtomicUsize::new(0); +pub static KERNEL_SIZE: AtomicUsize = AtomicUsize::new(0); +pub static CPU_COUNT: AtomicU32 = AtomicU32::new(0); +pub static AP_READY: AtomicBool = AtomicBool::new(false); +static BSP_READY: AtomicBool = AtomicBool::new(false); + +#[derive(Debug)] +#[repr(C, packed(8))] +pub struct KernelArgs { + kernel_base: usize, + kernel_size: usize, + stack_base: usize, + stack_size: usize, + env_base: usize, + env_size: usize, + hwdesc_base: usize, + hwdesc_size: usize, + areas_base: usize, + areas_size: usize, + + /// The physical base 64-bit pointer to the contiguous bootstrap/initfs. + bootstrap_base: usize, + /// Size of contiguous bootstrap/initfs physical region, not necessarily page aligned. + bootstrap_size: usize, +} + +/// The entry to Rust, all things must be initialized +#[no_mangle] +pub unsafe extern "C" fn kstart(args_ptr: *const KernelArgs) -> ! { + let bootstrap = { + let args = args_ptr.read(); + + // BSS should already be zero + { + assert_eq!(BSS_TEST_ZERO, 0); + assert_eq!(DATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFF); + } + + KERNEL_BASE.store(args.kernel_base, Ordering::SeqCst); + KERNEL_SIZE.store(args.kernel_size, Ordering::SeqCst); + + // Convert env to slice + let env = slice::from_raw_parts( + (crate::PHYS_OFFSET + args.env_base) as *const u8, + args.env_size, + ); + + // Set up graphical debug + #[cfg(feature = "graphical_debug")] + graphical_debug::init(env); + + // Get hardware descriptor data + //TODO: use env {DTB,RSDT}_{BASE,SIZE}? + let hwdesc_data = if args.hwdesc_base != 0 { + Some(unsafe { + slice::from_raw_parts( + (crate::PHYS_OFFSET + args.hwdesc_base) as *const u8, + args.hwdesc_size, + ) + }) + } else { + None + }; + + let dtb_res = hwdesc_data + .ok_or(fdt::FdtError::BadPtr) + .and_then(|data| Fdt::new(data)); + + let rsdp_opt = hwdesc_data.and_then(|data| { + if data.starts_with(b"RSD PTR ") { + Some(data.as_ptr()) + } else { + None + } + }); + + // Try to find serial port prior to logging + if let Ok(dtb) = &dtb_res { + device::serial::init_early(dtb); + } + + // Initialize logger + crate::log::init_logger(|r| { + use core::fmt::Write; + let _ = write!( + crate::debug::Writer::new(), + "{}:{} -- {}\n", + r.target(), + r.level(), + r.args() + ); + }); + log::set_max_level(::log::LevelFilter::Debug); + + info!("Redox OS starting..."); + info!( + "Kernel: {:X}:{:X}", + { args.kernel_base }, + args.kernel_base + args.kernel_size + ); + info!( + "Stack: {:X}:{:X}", + { args.stack_base }, + args.stack_base + args.stack_size + ); + info!( + "Env: {:X}:{:X}", + { args.env_base }, + args.env_base + args.env_size + ); + info!( + "HWDESC: {:X}:{:X}", + { args.hwdesc_base }, + args.hwdesc_base + args.hwdesc_size + ); + info!( + "Areas: {:X}:{:X}", + { args.areas_base }, + args.areas_base + args.areas_size + ); + info!( + "Bootstrap: {:X}:{:X}", + { args.bootstrap_base }, + args.bootstrap_base + args.bootstrap_size + ); + + // Setup interrupt handlers + core::arch::asm!( + " + ldr {tmp}, =exception_vector_base + msr vbar_el1, {tmp} + ", + tmp = out(reg) _, + ); + + // Initialize RMM + register_bootloader_areas(args.areas_base, args.areas_size); + if let Ok(dtb) = &dtb_res { + register_dev_memory_ranges(dtb); + } + + register_memory_region( + args.kernel_base, + args.kernel_size, + BootloaderMemoryKind::Kernel, + ); + register_memory_region( + args.stack_base, + args.stack_size, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.env_base, + args.env_size, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.hwdesc_base, + args.hwdesc_size, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.bootstrap_base, + args.bootstrap_size, + BootloaderMemoryKind::IdentityMap, + ); + crate::startup::memory::init(None, None); + + // Initialize paging + paging::init(); + + crate::misc::init(crate::cpu_set::LogicalCpuId::new(0)); + + // Reset AP variables + CPU_COUNT.store(1, Ordering::SeqCst); + AP_READY.store(false, Ordering::SeqCst); + BSP_READY.store(false, Ordering::SeqCst); + + // Setup kernel heap + allocator::init(); + + // Set up double buffer for graphical debug now that heap is available + #[cfg(feature = "graphical_debug")] + graphical_debug::init_heap(); + + // Activate memory logging + crate::log::init(); + + // Initialize devices + match dtb_res { + Ok(dtb) => { + dtb::init(hwdesc_data.map(|slice| (slice.as_ptr() as usize, slice.len()))); + device::init_devicetree(&dtb); + } + Err(err) => { + dtb::init(None); + log::warn!("failed to parse DTB: {}", err); + + #[cfg(feature = "acpi")] + { + crate::acpi::init(rsdp_opt); + } + } + } + + BSP_READY.store(true, Ordering::SeqCst); + + crate::Bootstrap { + base: crate::memory::Frame::containing(crate::paging::PhysicalAddress::new( + args.bootstrap_base, + )), + page_count: args.bootstrap_size / crate::memory::PAGE_SIZE, + env, + } + }; + + crate::kmain(CPU_COUNT.load(Ordering::SeqCst), bootstrap); +} + +#[repr(C, packed)] +#[allow(unused)] +pub struct KernelArgsAp { + cpu_id: u64, + page_table: u64, + stack_start: u64, + stack_end: u64, +} + +/// Entry to rust for an AP +#[allow(unused)] +pub unsafe extern "C" fn kstart_ap(_args_ptr: *const KernelArgsAp) -> ! { + loop {} +} diff --git a/src/arch/aarch64/stop.rs b/src/arch/aarch64/stop.rs new file mode 100644 index 00000000..1734e3d3 --- /dev/null +++ b/src/arch/aarch64/stop.rs @@ -0,0 +1,26 @@ +use core::arch::asm; + +pub unsafe fn kreset() -> ! { + println!("kreset"); + + asm!("hvc #0", + in("x0") 0x8400_0009_usize, + options(noreturn), + ) +} + +pub unsafe fn emergency_reset() -> ! { + asm!("hvc #0", + in("x0") 0x8400_0009_usize, + options(noreturn), + ) +} + +pub unsafe fn kstop() -> ! { + println!("kstop"); + + asm!("hvc #0", + in("x0") 0x8400_0008_usize, + options(noreturn), + ) +} diff --git a/src/arch/aarch64/time.rs b/src/arch/aarch64/time.rs new file mode 100644 index 00000000..54e2354b --- /dev/null +++ b/src/arch/aarch64/time.rs @@ -0,0 +1,11 @@ +use crate::time::NANOS_PER_SEC; + +pub fn monotonic_absolute() -> u128 { + //TODO: aarch64 generic timer counter + let ticks: usize; + unsafe { core::arch::asm!("mrs {}, cntpct_el0", out(reg) ticks) }; + let freq: usize; + unsafe { core::arch::asm!("mrs {}, cntfrq_el0", out(reg) freq) }; + + ticks as u128 * NANOS_PER_SEC / freq as u128 +} diff --git a/src/arch/aarch64/vectors.rs b/src/arch/aarch64/vectors.rs new file mode 100644 index 00000000..2b61f9d9 --- /dev/null +++ b/src/arch/aarch64/vectors.rs @@ -0,0 +1,112 @@ +core::arch::global_asm!( + " + // Exception vector stubs + // + // Unhandled exceptions spin in a wfi loop for the moment + // This can be macro-ified + +.globl exception_vector_base + + .align 11 +exception_vector_base: + + // Synchronous + .align 7 +__vec_00: + b synchronous_exception_at_el1_with_sp0 + b __vec_00 + + // IRQ + .align 7 +__vec_01: + b irq_at_el1 + b __vec_01 + + // FIQ + .align 7 +__vec_02: + b unhandled_exception + b __vec_02 + + // SError + .align 7 +__vec_03: + b unhandled_exception + b __vec_03 + + // Synchronous + .align 7 +__vec_04: + b synchronous_exception_at_el1_with_spx + b __vec_04 + + // IRQ + .align 7 +__vec_05: + b irq_at_el1 + b __vec_05 + + // FIQ + .align 7 +__vec_06: + b unhandled_exception + b __vec_06 + + // SError + .align 7 +__vec_07: + b unhandled_exception + b __vec_07 + + // Synchronous + .align 7 +__vec_08: + b synchronous_exception_at_el0 + b __vec_08 + + // IRQ + .align 7 +__vec_09: + b irq_at_el0 + b __vec_09 + + // FIQ + .align 7 +__vec_10: + b unhandled_exception + b __vec_10 + + // SError + .align 7 +__vec_11: + b unhandled_exception + b __vec_11 + + // Synchronous + .align 7 +__vec_12: + b unhandled_exception + b __vec_12 + + // IRQ + .align 7 +__vec_13: + b unhandled_exception + b __vec_13 + + // FIQ + .align 7 +__vec_14: + b unhandled_exception + b __vec_14 + + // SError + .align 7 +__vec_15: + b unhandled_exception + b __vec_15 + + .align 7 +exception_vector_end: +" +); diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 1abbd033..2aae6399 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -1,5 +1,27 @@ +#[cfg(target_arch = "aarch64")] +#[macro_use] +pub mod aarch64; +#[cfg(target_arch = "aarch64")] +pub use self::aarch64::*; + +#[cfg(target_arch = "x86")] +#[macro_use] +pub mod x86; +#[cfg(target_arch = "x86")] +pub use self::x86::*; + #[cfg(target_arch = "x86_64")] #[macro_use] pub mod x86_64; #[cfg(target_arch = "x86_64")] -pub use self::x86_64::*; \ No newline at end of file +pub use self::x86_64::*; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[macro_use] +mod x86_shared; + +#[cfg(target_arch = "riscv64")] +#[macro_use] +pub mod riscv64; +#[cfg(target_arch = "riscv64")] +pub use self::riscv64::*; diff --git a/src/arch/riscv64/consts.rs b/src/arch/riscv64/consts.rs new file mode 100644 index 00000000..95b3d9a4 --- /dev/null +++ b/src/arch/riscv64/consts.rs @@ -0,0 +1,42 @@ +#![allow(unused)] + +use super::CurrentRmmArch; +use rmm::Arch; + +const PML4_SHIFT: usize = (CurrentRmmArch::PAGE_LEVELS - 1) * CurrentRmmArch::PAGE_ENTRY_SHIFT + + CurrentRmmArch::PAGE_SHIFT; +/// The size of a single PML4 +pub const PML4_SIZE: usize = 1_usize << PML4_SHIFT; +pub const PML4_MASK: usize = CurrentRmmArch::PAGE_ENTRY_MASK << PML4_SHIFT; + +/// Offset of recursive paging (deprecated, but still reserved) +pub const RECURSIVE_PAGE_OFFSET: usize = (-(PML4_SIZE as isize)) as usize; +pub const RECURSIVE_PAGE_PTE3: usize = (RECURSIVE_PAGE_OFFSET & PML4_MASK) / PML4_SIZE; + +/// Offset of kernel +pub const KERNEL_OFFSET: usize = RECURSIVE_PAGE_OFFSET - PML4_SIZE; +pub const KERNEL_PTE3: usize = (KERNEL_OFFSET & PML4_MASK) / PML4_SIZE; + +/// Offset to kernel heap +pub const KERNEL_HEAP_OFFSET: usize = KERNEL_OFFSET - PML4_SIZE; +pub const KERNEL_HEAP_PTE3: usize = (KERNEL_HEAP_OFFSET & PML4_MASK) / PML4_SIZE; +/// Size of kernel heap +pub const KERNEL_HEAP_SIZE: usize = 1 * 1024 * 1024; // 1 MB + +/// Offset of temporary mapping for misc kernel bring-up actions +pub const KERNEL_TMP_MISC_OFFSET: usize = KERNEL_HEAP_OFFSET - PML4_SIZE; + +/// Offset to kernel percpu variables +pub const KERNEL_PERCPU_OFFSET: usize = KERNEL_TMP_MISC_OFFSET - PML4_SIZE; +pub const KERNEL_PERCPU_PML4: usize = (KERNEL_PERCPU_OFFSET & PML4_MASK) / PML4_SIZE; +/// Size of kernel percpu variables +pub const KERNEL_PERCPU_SHIFT: u8 = 16; // 2^16 = 64 KiB +pub const KERNEL_PERCPU_SIZE: usize = 1_usize << KERNEL_PERCPU_SHIFT; + +/// Offset of physmap +// This needs to match RMM's PHYS_OFFSET +pub const PHYS_OFFSET: usize = (-1_isize << (CurrentRmmArch::PAGE_ADDRESS_SHIFT - 1)) as usize; +pub const PHYS_PML4: usize = (PHYS_OFFSET & PML4_MASK) / PML4_SIZE; + +/// End offset of the user image, i.e. kernel start +pub const USER_END_OFFSET: usize = 1_usize << (CurrentRmmArch::PAGE_ADDRESS_SHIFT - 1); diff --git a/src/arch/riscv64/debug.rs b/src/arch/riscv64/debug.rs new file mode 100644 index 00000000..0e6ea549 --- /dev/null +++ b/src/arch/riscv64/debug.rs @@ -0,0 +1,58 @@ +use crate::log::{Log, LOG}; +use core::fmt; +use spin::MutexGuard; + +#[cfg(feature = "serial_debug")] +use super::device::serial::{SerialPort, COM1}; + +#[cfg(feature = "graphical_debug")] +use crate::devices::graphical_debug::{DebugDisplay, DEBUG_DISPLAY}; + +pub struct Writer<'a> { + log: MutexGuard<'a, Option>, + #[cfg(feature = "serial_debug")] + serial: MutexGuard<'a, Option>, + #[cfg(feature = "graphical_debug")] + display: MutexGuard<'a, Option>, +} + +impl<'a> Writer<'a> { + pub fn new() -> Writer<'a> { + Writer { + log: LOG.lock(), + #[cfg(feature = "graphical_debug")] + display: DEBUG_DISPLAY.lock(), + #[cfg(feature = "serial_debug")] + serial: COM1.lock(), + } + } + + pub fn write(&mut self, buf: &[u8], preserve: bool) { + if preserve { + if let Some(ref mut log) = *self.log { + log.write(buf); + } + } + + #[cfg(feature = "graphical_debug")] + { + if let Some(ref mut display) = *self.display { + let _ = display.write(buf); + } + } + + #[cfg(feature = "serial_debug")] + { + if let Some(ref mut serial) = *self.serial { + serial.write(buf); + } + } + } +} + +impl<'a> fmt::Write for Writer<'a> { + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + self.write(s.as_bytes(), true); + Ok(()) + } +} diff --git a/src/arch/riscv64/device/cpu/mod.rs b/src/arch/riscv64/device/cpu/mod.rs new file mode 100644 index 00000000..23103f1b --- /dev/null +++ b/src/arch/riscv64/device/cpu/mod.rs @@ -0,0 +1,5 @@ +use core::fmt::{Result, Write}; + +pub fn cpu_info(_w: &mut W) -> Result { + unimplemented!() +} diff --git a/src/arch/riscv64/device/irqchip/clint.rs b/src/arch/riscv64/device/irqchip/clint.rs new file mode 100644 index 00000000..560d86df --- /dev/null +++ b/src/arch/riscv64/device/irqchip/clint.rs @@ -0,0 +1,42 @@ +use spin::Mutex; +use syscall::{Io, Mmio}; +use crate::context::switch::tick; + +#[repr(packed(4))] +#[repr(C)] +struct ClintRegs { + /// per-hart MSIP registers + /// bit 0: trigger IPI for the hart + msip: [Mmio; 4095], // +0000 -- 3fff + _rsrv1: u32, + /// per-hart MTIMECMP registers + /// timer interrupt trigger threshold + mtimecmp: [Mmio; 4095], // +4000 - bff7 + mtime: Mmio // current time +} + +pub struct Clint { + regs: &'static mut ClintRegs, + freq: u64 +} + +pub static CLINT: Mutex> = Mutex::new(None); + +impl Clint { + pub fn new(addr: *mut u8, size: usize, freq: usize) -> Self { + assert!(size >= core::mem::size_of::()); + Self { + regs: unsafe { (addr as *mut ClintRegs).as_mut().unwrap() }, + freq: freq as u64 + } + } + + pub fn init(self: &mut Self) { + (*self.regs).mtimecmp[0].write((*self.regs).mtime.read() + self.freq / 100); + } + + pub fn timer_irq(self: &mut Self, hart: usize) { + (*self.regs).mtimecmp[hart].write((*self.regs).mtimecmp[hart].read() + self.freq / 100); + tick(); + } +} diff --git a/src/arch/riscv64/device/irqchip/clint_sbi.rs b/src/arch/riscv64/device/irqchip/clint_sbi.rs new file mode 100644 index 00000000..57aab226 --- /dev/null +++ b/src/arch/riscv64/device/irqchip/clint_sbi.rs @@ -0,0 +1,148 @@ +use crate::{ + context, + context::timeout, + dtb::irqchip::{register_irq, InterruptHandler, IrqCell, IRQ_CHIP}, +}; +use alloc::{boxed::Box, vec::Vec}; +use byteorder::{ByteOrder, BE}; +use core::{arch::asm, cmp::max}; +use fdt::node::FdtNode; +use spin::Mutex; +// This is a Core-Local Interruptor (CLINT). A single device directly routed into each HLIC +// It is responsible for local timer and IPI interrupts +// An example DTS: +// /soc/ +// clint@2000000/ +// interrupts-extended = <&hlic0 3>, <&hlic0 7>, <&hlic1 3>, <&hlic1 7>, +// <&hlic2 3>, <&hlic2 7>, <&hlic3 3>, <&hlic3 7>; +// reg = <0x200000000 0x10000>; +// compatible = "sifive,clint0", "riscv,clint0"; + +pub struct Clint { + freq: u64, + next_event: Vec, +} + +pub static CLINT: Mutex> = Mutex::new(None); +const TICKS_PER_SECOND: u64 = 100; +const IRQ_IPI: usize = 0; +const IRQ_TIMER: usize = 1; + +struct ClintConnector { + hart_id: usize, + irq: usize, +} + +impl InterruptHandler for ClintConnector { + fn irq_handler(&mut self, _irq: u32) { + CLINT + .lock() + .as_mut() + .unwrap() + .irq_handler(self.hart_id, self.irq); + if self.irq == IRQ_TIMER { + // a bit of hack, but it is a really bad idea to call scheduler + // from inside clint irq handler + timeout::trigger(); + context::switch::tick(); + } + } +} + +fn map_interrupt(irq: u32) -> u32 { + match irq { + 3 => 1, // map M-mode IPI to S-mode IPI + 7 => 5, // map M-mode timer to S-mode timer + x => x, + } +} + +impl Clint { + pub fn new(freq: usize, node: &FdtNode) -> Self { + // TODO IPI + // let reg = clint_node.reg().unwrap().next().unwrap(); + // reg.starting_address.add(crate::PHYS_OFFSET) as *mut u8; + // reg.size.unwrap(); + + let mut me = Self { + freq: freq as u64, + next_event: Vec::new(), + }; + let mut interrupts = node + .property("interrupts-extended") + .unwrap() + .value + .chunks(4) + .map(|x| BE::read_u32(x)); + let mut hart_id = 0; + while let Ok([phandle1, irq0, phandle2, irq1]) = interrupts.next_chunk::<4>() { + assert_eq!( + phandle1, phandle2, + "Invalid interrupts-extended property for CLINT" + ); + let hlic = unsafe { + IRQ_CHIP + .irq_chip_list + .chips + .iter() + .find(|x| x.phandle == phandle1) + .expect("Couldn't find HLIC in irqchip list for CLINT") + }; + + // FIXME dirty hack map M-mode interrupts (handled by SBI) to S-mode interrupts we get from SBI + // Why aren't S-mode interrupts in the DTB already? + let irq0 = IrqCell::L1(map_interrupt(irq0)); + let irq1 = IrqCell::L1(map_interrupt(irq1)); + + let virq0 = hlic + .ic + .irq_xlate(irq0) + .expect("Couldn't get virq 0 from HLIC"); + let virq1 = hlic + .ic + .irq_xlate(irq1) + .expect("Couldn't get virq 1 from HLIC"); + register_irq(virq0 as u32, Box::new(ClintConnector { hart_id, irq: 0 })); + register_irq(virq1 as u32, Box::new(ClintConnector { hart_id, irq: 1 })); + hart_id += 1; + } + me.next_event.resize_with(hart_id, || 0); + me + } + + pub(crate) fn irq_handler(self: &mut Self, hart_id: usize, irq: usize) { + match irq { + IRQ_IPI => { + println!("IPI interrupt at {}", hart_id); + } + IRQ_TIMER => { + let mtime: usize; + unsafe { + asm!( + "rdtime t0", + lateout("t0") mtime + ) + }; + + self.next_event[hart_id] = + max(self.next_event[hart_id], mtime as u64) + self.freq / TICKS_PER_SECOND; + sbi_rt::set_timer(self.next_event[hart_id]).expect("SBI timer cannot be set!"); + } + _ => { + panic!("Unexpected CLINT irq") + } + } + } + + pub fn init(self: &mut Self, hart: usize) { + let mtime: usize; + unsafe { + asm!( + "rdtime t0", + lateout("t0") mtime + ) + }; + self.next_event[hart] = mtime as u64 + (self.freq / TICKS_PER_SECOND); + sbi_rt::set_timer(self.next_event[hart]).expect("SBI timer cannot be set!"); + } +} diff --git a/src/arch/riscv64/device/irqchip/hlic.rs b/src/arch/riscv64/device/irqchip/hlic.rs new file mode 100644 index 00000000..68021065 --- /dev/null +++ b/src/arch/riscv64/device/irqchip/hlic.rs @@ -0,0 +1,159 @@ +use crate::dtb::irqchip::{InterruptController, InterruptHandler, IrqCell, IrqDesc, IRQ_CHIP}; +use alloc::vec::Vec; +use core::arch::asm; +use fdt::{node::NodeProperty, Fdt}; +use syscall::{Error, EINVAL}; + +// This is a hart-local interrupt controller, a root of irqchip tree +// An example DTS: +// /cpus/ +// cpu@1/ +// interrupt-controller/ +// #interrupt-cells = 0x00000001 +// interrupt-controller = +// compatible = "riscv,cpu-intc" +// phandle = 0x00000006 + +fn acknowledge(interrupt: usize) { + unsafe { + asm!( + "csrc sip, t0", + in("t0") 1usize << interrupt, + options(nostack) + ) + } +} + +pub unsafe fn interrupt(hart: usize, interrupt: usize) { + assert!( + hart < CPU_INTERRUPT_HANDLERS.len(), + "Unexpected hart in interrupt routine" + ); + acknowledge(interrupt); + let ic_idx = CPU_INTERRUPT_HANDLERS[hart].unwrap_or_else(|| { + panic!( + "No hlic connected to hart {} yet interrupt {} occurred", + hart, interrupt + ) + }); + let virq = IRQ_CHIP + .irq_to_virq(ic_idx, interrupt as u32) + .unwrap_or_else(|| panic!("HLIC doesn't know of interrupt {}", interrupt)); + if let Some(handler) = &mut IRQ_CHIP.irq_desc[virq].handler { + handler.irq_handler(virq as u32); + } else if let Some(ic_idx) = IRQ_CHIP.irq_desc[virq].basic.child_ic_idx { + IRQ_CHIP.irq_chip_list.chips[ic_idx] + .ic + .irq_handler(virq as u32); + } else { + panic!( + "Unconnected interrupt {} occurred on hlic connected to hart {}", + interrupt, hart + ); + } +} + +pub fn init() { + unsafe { + asm!( + "csrs sie, t0", + in("t0") (0xFFFF), + options(nostack) + ) + } +} + +static mut CPU_INTERRUPT_HANDLERS: Vec> = Vec::new(); + +pub struct Hlic { + virq_base: usize, +} + +impl Hlic { + pub(crate) fn new() -> Self { + return Self { virq_base: 0 }; + } +} +impl InterruptHandler for Hlic { + fn irq_handler(&mut self, irq: u32) { + assert!(irq < 16, "Unsupported HLIC interrupt raised!"); + unsafe { + IRQ_CHIP.trigger_virq(self.virq_base as u32 + irq); + } + } +} + +impl InterruptController for Hlic { + fn irq_init( + &mut self, + fdt_opt: Option<&Fdt>, + irq_desc: &mut [IrqDesc; 1024], + ic_idx: usize, + irq_idx: &mut usize, + ) -> syscall::Result<()> { + let desc = unsafe { &IRQ_CHIP.irq_chip_list.chips[ic_idx] }; + let fdt = fdt_opt.unwrap(); + let cpu_node = fdt + .find_all_nodes("/cpus/cpu") + .find(|x| { + x.children().any(|x| { + x.property("phandle").and_then(NodeProperty::as_usize) + == Some(desc.phandle as usize) + }) + }) + .expect("Could not find CPU node for HLIC controller"); + let hart = cpu_node.property("reg").unwrap().as_usize().unwrap(); + unsafe { + if CPU_INTERRUPT_HANDLERS.len() <= hart { + CPU_INTERRUPT_HANDLERS.resize(hart + 1, None); + } + assert!( + CPU_INTERRUPT_HANDLERS[hart].replace(ic_idx).is_none(), + "Conflicting HLIC interrupt handler found" + ); + } + self.virq_base = *irq_idx; + for i in 0..16 { + irq_desc[self.virq_base + i].basic.ic_idx = ic_idx; + irq_desc[self.virq_base + i].basic.ic_irq = i as u32; + } + *irq_idx += 16; + Ok(()) + } + + fn irq_ack(&mut self) -> u32 { + panic!("Cannot ack HLIC interrupt"); + } + + fn irq_eoi(&mut self, _irq_num: u32) {} + + fn irq_enable(&mut self, _irq_num: u32) { + // This would require IPI to a correct core + // Not bothering with this, all interrupts are enabled at all times + } + + fn irq_disable(&mut self, _irq_num: u32) { + // This would require IPI to a correct core + // Not bothering with this, all interrupts are enabled at all times + } + + fn irq_xlate(&self, irq_data: IrqCell) -> syscall::Result { + match irq_data { + IrqCell::L1(irq) if irq <= 0xF => Ok(self.virq_base + irq as usize), + _ => Err(Error::new(EINVAL)), + } + } + + fn irq_to_virq(&self, hwirq: u32) -> Option { + if hwirq > 0 && hwirq <= 0xF { + Some(self.virq_base + hwirq as usize) + } else { + None + } + } +} + +pub fn irqchip_for_hart(hart: usize) -> Option { + let value = unsafe { CPU_INTERRUPT_HANDLERS.get(hart) }?; + *value +} diff --git a/src/arch/riscv64/device/irqchip/mod.rs b/src/arch/riscv64/device/irqchip/mod.rs new file mode 100644 index 00000000..310e4287 --- /dev/null +++ b/src/arch/riscv64/device/irqchip/mod.rs @@ -0,0 +1,44 @@ +use self::clint::Clint; +use crate::dtb::irqchip::InterruptController; +use alloc::boxed::Box; +use fdt::Fdt; + +pub(crate) mod hlic; +mod plic; + +#[path = "clint_sbi.rs"] +mod clint; + +// pub mod clint; // actual clint.rs off limits if SBI is present + +pub fn new_irqchip(ic_str: &str) -> Option> { + if ic_str.contains("riscv,cpu-intc") { + Some(Box::new(hlic::Hlic::new())) + } else if ic_str.contains("riscv,plic0") || ic_str.contains("sifive,plic-1.0.0") { + Some(Box::new(plic::Plic::new())) + } else { + log::warn!("no driver for interrupt controller {:?}", ic_str); + None + } +} + +pub unsafe fn init_clint(fdt: &Fdt) { + let cpus = fdt.find_node("/cpus").unwrap(); + let clock_freq = cpus + .property("timebase-frequency") + .unwrap() + .as_usize() + .unwrap(); + + let clint_node = fdt.find_node("/soc/clint").unwrap(); + assert!(clint_node + .compatible() + .unwrap() + .all() + .find(|x| ((*x).eq("riscv,clint0"))) + .is_some()); + + let clint = Clint::new(clock_freq, &clint_node); + *clint::CLINT.lock() = Some(clint); + clint::CLINT.lock().as_mut().unwrap().init(0); +} diff --git a/src/arch/riscv64/device/irqchip/plic.rs b/src/arch/riscv64/device/irqchip/plic.rs new file mode 100644 index 00000000..242e392a --- /dev/null +++ b/src/arch/riscv64/device/irqchip/plic.rs @@ -0,0 +1,198 @@ +use crate::{ + arch::{device::irqchip::hlic, start::BOOT_HART_ID}, + dtb::{ + get_mmio_address, + irqchip::{InterruptController, InterruptHandler, IrqCell, IrqDesc, IRQ_CHIP}, + }, +}; +use core::{mem, num::NonZero, sync::atomic::Ordering}; +use fdt::Fdt; +use log::{error, info}; +use syscall::{Error, Io, Mmio, EINVAL}; + +#[repr(packed(4))] +#[repr(C)] +struct InterruptThresholdRegs { + threshold: Mmio, + claim_complete: Mmio, + _rsrv: [u32; 1022], +} + +static MAX_CONTEXTS: usize = 64; + +#[repr(packed(4))] +#[repr(C)] +struct PlicRegs { + /// source priorities + source_priority: [Mmio; 1024], // +0000 -- 0fff + // pending interrupts + pending: [Mmio; 1024], // +1000 -- 1fff + // per-context interrupt enable + enable: [[Mmio; 32]; 16320], // +2000 - 1f'ffff + // per-context priority threshold and acknowledge + thresholds: [InterruptThresholdRegs; 64], // specced at +20'0000 - 0fff'ffff for 15872 contexts + // but actual memory allotted in firmware is much lower +} + +const _: () = assert!(0x1000 == mem::offset_of!(PlicRegs, pending)); +const _: () = assert!(0x2000 == mem::offset_of!(PlicRegs, enable)); +const _: () = assert!(0x20_0000 == mem::offset_of!(PlicRegs, thresholds)); +const _: () = assert!(0x1000 == mem::size_of::()); + +impl PlicRegs { + pub fn set_priority(self: &mut Self, irq: usize, priority: usize) { + assert!(irq > 0 && irq <= 1023 && priority < 8); + self.source_priority[irq].write(priority as u32); + } + + pub fn pending(self: &Self, irq_lane: usize) -> u32 { + assert!(irq_lane < 32); + self.pending[irq_lane].read() + } + + pub fn enable(self: &mut Self, context: usize, irq: NonZero, enable: bool) { + assert!(irq.get() <= 1023 && context < MAX_CONTEXTS); + let irq_lane = irq.get() / 32; + let irq = irq.get() % 32; + self.enable[context][irq_lane].writef(1u32 << irq, enable); + } + + pub fn set_priority_threshold(self: &mut Self, context: usize, priority: usize) { + assert!(context < MAX_CONTEXTS && priority <= 7); + self.thresholds[context].threshold.write(priority as u32); + } + + pub fn claim(self: &mut Self, context: usize) -> Option> { + assert!(context < MAX_CONTEXTS); + let claim = self.thresholds[context].claim_complete.read(); + NonZero::new(claim as usize) + } + + pub fn complete(self: &mut Self, context: usize, claim: NonZero) { + assert!(context < MAX_CONTEXTS); + self.thresholds[context] + .claim_complete + .write(claim.get() as u32); + } +} + +pub struct Plic { + regs: *mut PlicRegs, + ndev: usize, + virq_base: usize, + context: usize, +} + +impl Plic { + pub fn new() -> Self { + Self { + regs: 0 as *mut PlicRegs, + ndev: 0, + virq_base: 0, + context: 0, + } + } +} +impl InterruptHandler for Plic { + fn irq_handler(&mut self, _irq: u32) { + unsafe { + let irq = self.irq_ack(); + //println!("PLIC interrupt {}", irq); + if let Some(virq) = self.irq_to_virq(irq) { + IRQ_CHIP.trigger_virq(virq as u32); + } else { + error!("unexpected irq num {}", irq); + self.irq_eoi(irq); + } + } + //println!("PLIC interrupt done"); + } +} + +impl InterruptController for Plic { + fn irq_init( + &mut self, + fdt_opt: Option<&Fdt>, + irq_desc: &mut [IrqDesc; 1024], + ic_idx: usize, + irq_idx: &mut usize, + ) -> syscall::Result<()> { + let desc = unsafe { &IRQ_CHIP.irq_chip_list.chips[ic_idx] }; + let fdt = fdt_opt.unwrap(); + let my_node = fdt.find_phandle(desc.phandle).unwrap(); + + // MMIO region + let reg = my_node.reg().unwrap().next().unwrap(); + let addr = get_mmio_address(&fdt, &my_node, ®).unwrap(); + // Specifies how many external interrupts are supported by this controller. + let ndev = my_node + .property("riscv,ndev") + .and_then(|x| x.as_usize()) + .unwrap(); + + self.regs = (addr + crate::PHYS_OFFSET) as *mut PlicRegs; + self.ndev = ndev; + + self.virq_base = *irq_idx; + for i in 0..ndev { + irq_desc[self.virq_base + i].basic.ic_idx = ic_idx; + irq_desc[self.virq_base + i].basic.ic_irq = i as u32; + } + *irq_idx += ndev; + + // route all interrupts to boot HART + // TODO spread irqs over all the cores when we have them? + let hlic_ic_idx = hlic::irqchip_for_hart(BOOT_HART_ID.load(Ordering::Relaxed)) + .expect("Could not find HLIC irqchip for the boot hart while initing PLIC"); + self.context = desc + .parents + .iter() + .position(|x| x.parent_interrupt.is_some() && x.parent == hlic_ic_idx) + .unwrap(); + info!("PLIC: using context {}", self.context); + + let regs = unsafe { self.regs.as_mut().unwrap() }; + regs.set_priority_threshold(self.context, 0); + + Ok(()) + } + + fn irq_ack(&mut self) -> u32 { + let regs = unsafe { self.regs.as_mut().unwrap() }; + regs.claim(self.context).unwrap().get() as u32 + } + + fn irq_eoi(&mut self, irq_num: u32) { + let regs = unsafe { self.regs.as_mut().unwrap() }; + regs.complete(self.context, NonZero::new(irq_num as usize).unwrap()); + } + + fn irq_enable(&mut self, irq_num: u32) { + assert!(irq_num > 0 && irq_num as usize <= self.ndev); + let regs = unsafe { self.regs.as_mut().unwrap() }; + regs.set_priority(irq_num as usize, 1); + regs.enable(self.context, NonZero::new(irq_num as usize).unwrap(), true); + } + + fn irq_disable(&mut self, irq_num: u32) { + assert!(irq_num > 0 && irq_num as usize <= self.ndev); + let regs = unsafe { self.regs.as_mut().unwrap() }; + regs.set_priority(irq_num as usize, 1); + regs.enable(self.context, NonZero::new(irq_num as usize).unwrap(), false); + } + + fn irq_xlate(&self, irq_data: IrqCell) -> syscall::Result { + match irq_data { + IrqCell::L1(irq) => Ok(self.virq_base + irq as usize), + _ => Err(Error::new(EINVAL)), + } + } + + fn irq_to_virq(&self, hwirq: u32) -> Option { + if (hwirq as usize) < self.ndev { + Some(self.virq_base + hwirq as usize) + } else { + None + } + } +} diff --git a/src/arch/riscv64/device/mod.rs b/src/arch/riscv64/device/mod.rs new file mode 100644 index 00000000..de2fa976 --- /dev/null +++ b/src/arch/riscv64/device/mod.rs @@ -0,0 +1,101 @@ +use crate::{ + arch::{device::irqchip::hlic, time}, + dtb::DTB_BINARY, +}; +use fdt::{ + node::{FdtNode, NodeProperty}, + Fdt, +}; + +pub mod cpu; +pub(crate) mod irqchip; +pub mod serial; + +use crate::arch::device::irqchip::init_clint; + +fn string_property(name: &str) -> bool { + name == "compatible" + || name == "model" + || name == "device_type" + || name == "status" + || name == "riscv,isa-base" + || name == "riscv,isa" + || name == "mmu-type" + || name == "stdout-path" +} + +fn print_property(prop: &NodeProperty, n_spaces: usize) { + (0..n_spaces).for_each(|_| print!(" ")); + print!("{} =", prop.name); + if string_property(prop.name) + && let Some(str) = prop.as_str() + { + println!(" \"{}\"", str); + } else if let Some(value) = prop.as_usize() { + println!(" 0x{:08x}", value); + } else { + for v in prop.value { + print!(" {:02x}", v); + } + println!(); + } +} +fn print_node(node: &FdtNode<'_, '_>, n_spaces: usize) { + (0..n_spaces).for_each(|_| print!(" ")); + println!("{}/", node.name); + for prop in node.properties() { + print_property(&prop, n_spaces + 4); + } + + for child in node.children() { + print_node(&child, n_spaces + 4); + } +} + +pub(crate) fn dump_fdt(fdt: &Fdt) { + if let Some(root) = fdt.find_node("/") { + print_node(&root, 0); + } +} + +unsafe fn init_intc(cpu: &FdtNode) { + let intc_node = cpu + .children() + .find(|x| x.name == "interrupt-controller") + .unwrap(); + assert_eq!(intc_node.compatible().unwrap().first(), "riscv,cpu-intc"); + // This controller is hardwired into interrupt handler code and has no Mmios + hlic::init(); // enable interrupts at HLIC level +} + +pub unsafe fn init() { + let data = DTB_BINARY.get().unwrap(); + let fdt = Fdt::new(data).unwrap(); + + crate::dtb::irqchip::init(&fdt); + + let cpu = fdt.find_node(format!("/cpus/cpu@{}", 0).as_str()).unwrap(); + init_intc(&cpu); + init_time(&fdt); +} + +fn init_time(fdt: &Fdt) { + let cpus = fdt.find_node("/cpus").unwrap(); + let clock_freq = cpus + .property("timebase-frequency") + .unwrap() + .as_usize() + .unwrap(); + time::init(clock_freq); +} + +pub unsafe fn init_noncore() { + let data = DTB_BINARY.get().unwrap(); + let fdt = Fdt::new(data).unwrap(); + + init_clint(&fdt); + serial::init(&fdt); +} + +#[derive(Default)] +pub struct ArchPercpuMisc; diff --git a/src/arch/riscv64/device/serial.rs b/src/arch/riscv64/device/serial.rs new file mode 100644 index 00000000..b74e1a50 --- /dev/null +++ b/src/arch/riscv64/device/serial.rs @@ -0,0 +1,93 @@ +use alloc::boxed::Box; +use fdt::Fdt; +use log::info; +use spin::Mutex; +use syscall::Mmio; + +use crate::{ + devices::uart_16550, + dtb::{ + diag_uart_range, get_interrupt, interrupt_parent, + irqchip::{register_irq, InterruptHandler, IRQ_CHIP}, + }, + scheme::{ + debug::{debug_input, debug_notify}, + irq::irq_trigger, + }, +}; + +pub struct SerialPort { + inner: &'static mut uart_16550::SerialPort>, +} +impl SerialPort { + pub fn write(&mut self, buf: &[u8]) { + self.inner.write(buf) + } + pub fn receive(&mut self) { + while let Some(c) = self.inner.receive() { + debug_input(c); + } + debug_notify(); + } +} + +pub static COM1: Mutex> = Mutex::new(None); + +pub struct Com1Irq {} + +impl InterruptHandler for Com1Irq { + fn irq_handler(&mut self, irq: u32) { + if let Some(ref mut serial_port) = *COM1.lock() { + serial_port.receive(); + }; + unsafe { + irq_trigger(irq as u8); + IRQ_CHIP.irq_eoi(irq); + } + } +} + +pub unsafe fn init_early(dtb: &Fdt) { + if COM1.lock().is_some() { + // Hardcoded UART + return; + } + + if let Some((phys, _, _, _, compatible)) = diag_uart_range(dtb) { + let virt = crate::PHYS_OFFSET + phys; + let port = if compatible == "ns16550a" { + let serial_port = uart_16550::SerialPort::>::new(virt); + serial_port.init(); + Some(SerialPort { inner: serial_port }) + } else { + None + }; + match port { + Some(port) => { + *COM1.lock() = Some(port); + } + None => {} + } + } +} + +pub unsafe fn init(fdt: &Fdt) -> Option<()> { + if let Some(node) = fdt.find_compatible(&["ns16550a"]) { + let intr = get_interrupt(fdt, &node, 0).unwrap(); + let interrupt_parent = interrupt_parent(fdt, &node)?; + let phandle = interrupt_parent.property("phandle")?.as_usize()? as u32; + let ic_idx = IRQ_CHIP.phandle_to_ic_idx(phandle)?; + + let virq = IRQ_CHIP.irq_chip_list.chips[ic_idx] + .ic + .irq_xlate(intr) + .unwrap(); + info!("serial_port virq = {}", virq); + register_irq(virq as u32, Box::new(Com1Irq {})); + IRQ_CHIP.irq_enable(virq as u32); + } + if let Some(ref mut _serial_port) = *COM1.lock() { + // serial_port.enable_irq(); // FIXME receive int is enabled by default in 16550. Disable by default? + } + Some(()) +} diff --git a/src/arch/riscv64/interrupt/exception.rs b/src/arch/riscv64/interrupt/exception.rs new file mode 100644 index 00000000..56495197 --- /dev/null +++ b/src/arch/riscv64/interrupt/exception.rs @@ -0,0 +1,214 @@ +use ::syscall::Exception; +use core::{arch::global_asm, sync::atomic::Ordering}; +use log::{error, info}; +use rmm::VirtualAddress; + +use crate::{ + arch::{device::irqchip, start::BOOT_HART_ID}, + context::signal::excp_handler, + memory::GenericPfFlags, + panic::stack_trace, + ptrace, + syscall::{self, flag::*}, +}; + +const BREAKPOINT: usize = 3; +const USERMODE_ECALL: usize = 8; +const INSTRUCTION_PAGE_FAULT: usize = 12; +const LOAD_PAGE_FAULT: usize = 13; +const STORE_PAGE_FAULT: usize = 15; + +use super::InterruptStack; + +global_asm!(concat!( + ".global exception_handler\n", + ".p2align 3\n", +"exception_handler:\n", + "csrrw tp, sscratch, tp\n", + "beq tp, x0, 3f\n", // exception before percpu data is available; got to be S mode + + "sd t0, 0(tp)\n", + "csrr t0, sstatus\n", + "andi t0, t0, 1<<8\n",// SPP bit + "bne t0, x0, 2f\n", + + // trap/interrupt from U mode, switch stacks + "ld t0, 0(tp)\n", + "sd sp, 0(tp)\n", + "ld sp, 8(tp)\n", + + push_registers!(), + "ld t0, 0(tp)\n", + "sd t0, (1 * 8)(sp)\n", // save original SP + "csrrw t0, sscratch, tp\n", + "sd t0, (3 * 8)(sp)\n", // save original TP, and restore sscratch to handle double faults + + "mv a0, sp\n", + "jal {0}\n", + + // save S mode stack to percpu + "addi t0, sp, 32 * 8\n", + "sd t0, 8(tp)\n", + "li t0, 1 << 8\n", // return to U mode (sstatus might've been modified by nested trap or context switch) + "csrc sstatus, t0\n", + "j 4f\n", + +"2: ld t0, 0(tp)\n", // S-mode +"3:\n", // S mode early + + "addi sp, sp, -2 * 8\n", // fake stack frame for the stack tracer + + push_registers!(), + + "addi t1, sp, 34 * 8\n", + "sd t1, (1 * 8)(sp)\n", // save original SP + "csrrw t1, sscratch, tp\n", + "sd t1, (3 * 8)(sp)\n", // save original TP, and restore sscratch to handle double faults + + "sd t0, (33 * 8)(sp)\n", // fill the stack frame. t0 holds original pc after push_registers + "sd fp, (32 * 8)(sp)\n", + "addi fp, sp, 34 * 8\n", + + "mv a0, sp\n", + "jal {0}\n", + // return to S mode with interrupts disabled + // (sstatus might've been modified by nested trap or context switch) + "li t0, 1 << 8\n", + "csrs sstatus, t0\n", + "li t0, 1 << 5\n", + "csrc sstatus, t0\n", + +"4:", + pop_registers!(), + "sret", +), + sym exception_handler_inner +); + +unsafe fn exception_handler_inner(regs: &mut InterruptStack) { + let scause: usize; + let sstatus: usize; + core::arch::asm!( + "csrr t0, scause", + "csrr t1, sstatus", + lateout("t0") scause, + lateout("t1") sstatus, + options(nostack) + ); + + //log::info!("Exception handler incoming: sepc={:x} scause={:x} sstatus={:x}", regs.iret.sepc, scause, sstatus); + + let user_mode = sstatus & (1 << 8) == 0; + + if (scause as isize) < 0 { + handle_interrupt(scause & 0xF); + } else if page_fault(scause, regs, user_mode) { + } else if user_mode { + handle_user_exception(scause, regs); + } else { + handle_system_exception(scause, regs); + } + //log::info!("Exception handler outgoing"); +} + +unsafe fn handle_system_exception(scause: usize, regs: &InterruptStack) { + let stval: usize; + let tp: usize; + core::arch::asm!( + "csrr t0, stval", + "mv t1, tp", + lateout("t0") stval, + lateout("t1") tp, + options(nostack) + ); + + error!( + "S-mode exception! scause={:#016x}, stval={:#016x}", + scause, stval + ); + regs.dump(); + + if tp == 0 { + // Early failure - before misc::init and potentially before RMM init + // Do not attempt to trace stack because it would probably trap again + loop {} + } + + stack_trace(); + loop {} +} + +unsafe fn handle_interrupt(interrupt: usize) { + // FIXME retrieve from percpu area + // For now all the interrupts go to boot hart so this suffices... + let hart: usize = BOOT_HART_ID.load(Ordering::Relaxed); + irqchip::hlic::interrupt(hart, interrupt); +} + +unsafe fn handle_user_exception(scause: usize, regs: &mut InterruptStack) { + if scause == USERMODE_ECALL { + let r = &mut regs.registers; + regs.iret.sepc += 4; // skip ecall + let ret = syscall::syscall(r.x17, r.x10, r.x11, r.x12, r.x13, r.x14); + r.x10 = ret; + return; + } + + if scause == BREAKPOINT { + if ptrace::breakpoint_callback(PTRACE_STOP_BREAKPOINT, None).is_some() { + return; + } + } + + let stval: usize; + core::arch::asm!( + "csrr t0, stval", + lateout("t0") stval, + options(nostack) + ); + + info!( + "U-mode exception! scause={:#016x}, stval={:#016x}", + scause, stval + ); + regs.dump(); + + // TODO + /* + let signal = match scause { + 0 | 4 | 6 | 18 | 19 => SIGBUS, // misaligned / machine check + 2 | 8 | 9 => SIGILL, // Illegal instruction / breakpoint / ecall + BREAKPOINT => SIGTRAP, + _ => SIGSEGV, + }; + */ + excp_handler(Exception { kind: scause }); +} + +unsafe fn page_fault(scause: usize, regs: &mut InterruptStack, user_mode: bool) -> bool { + if scause != INSTRUCTION_PAGE_FAULT && scause != LOAD_PAGE_FAULT && scause != STORE_PAGE_FAULT { + return false; + } + + let stval: usize; + core::arch::asm!( + "csrr t0, stval", + lateout("t0") stval, + options(nostack) + ); + + let address = VirtualAddress::new(stval); + let mut generic_flags = GenericPfFlags::empty(); + + generic_flags.set(GenericPfFlags::INVOLVED_WRITE, scause == STORE_PAGE_FAULT); + generic_flags.set(GenericPfFlags::USER_NOT_SUPERVISOR, user_mode); + generic_flags.set( + GenericPfFlags::INSTR_NOT_DATA, + scause == INSTRUCTION_PAGE_FAULT, + ); + // FIXME can these conditions be distinguished? Should they be? + generic_flags.set(GenericPfFlags::INVL, false); + generic_flags.set(GenericPfFlags::PRESENT, false); + + crate::memory::page_fault_handler(regs, generic_flags, address).is_ok() +} diff --git a/src/arch/riscv64/interrupt/handler.rs b/src/arch/riscv64/interrupt/handler.rs new file mode 100644 index 00000000..e7c29143 --- /dev/null +++ b/src/arch/riscv64/interrupt/handler.rs @@ -0,0 +1,316 @@ +use crate::{memory::ArchIntCtx, syscall::IntRegisters}; +use core::mem::size_of; + +#[derive(Default)] +#[repr(C)] +pub struct Registers { + pub x1: usize, // ra + pub x2: usize, // sp + pub x3: usize, // gp + pub x4: usize, // tp + pub x5: usize, // t0 + pub x6: usize, // t1 + pub x7: usize, // t2 + pub x8: usize, // s0/fp + pub x9: usize, // s1 + pub x10: usize, // a0... + pub x11: usize, + pub x12: usize, + pub x13: usize, + pub x14: usize, + pub x15: usize, + pub x16: usize, + pub x17: usize, // a7 + pub x18: usize, // s2... + pub x19: usize, + pub x20: usize, + pub x21: usize, + pub x22: usize, + pub x23: usize, + pub x24: usize, + pub x25: usize, + pub x26: usize, + pub x27: usize, // s11 + pub x28: usize, // t3... + pub x29: usize, + pub x30: usize, + pub x31: usize, // t6 +} + +impl Registers { + pub fn dump(&self) { + println!("X1: {:>016X}", { self.x1 }); + println!("X2: {:>016X}", { self.x2 }); + println!("X3: {:>016X}", { self.x3 }); + println!("X4: {:>016X}", { self.x4 }); + println!("X5: {:>016X}", { self.x5 }); + println!("X6: {:>016X}", { self.x6 }); + println!("X7: {:>016X}", { self.x7 }); + println!("X8: {:>016X}", { self.x8 }); + println!("X9: {:>016X}", { self.x9 }); + println!("X10: {:>016X}", { self.x10 }); + println!("X11: {:>016X}", { self.x11 }); + println!("X12: {:>016X}", { self.x12 }); + println!("X13: {:>016X}", { self.x13 }); + println!("X14: {:>016X}", { self.x14 }); + println!("X15: {:>016X}", { self.x15 }); + println!("X16: {:>016X}", { self.x16 }); + println!("X17: {:>016X}", { self.x17 }); + println!("X18: {:>016X}", { self.x18 }); + println!("X19: {:>016X}", { self.x19 }); + println!("X20: {:>016X}", { self.x20 }); + println!("X21: {:>016X}", { self.x21 }); + println!("X22: {:>016X}", { self.x22 }); + println!("X23: {:>016X}", { self.x23 }); + println!("X24: {:>016X}", { self.x24 }); + println!("X25: {:>016X}", { self.x25 }); + println!("X26: {:>016X}", { self.x26 }); + println!("X27: {:>016X}", { self.x27 }); + println!("X28: {:>016X}", { self.x28 }); + println!("X29: {:>016X}", { self.x29 }); + println!("X30: {:>016X}", { self.x30 }); + println!("X31: {:>016X}", { self.x31 }); + } +} + +#[derive(Default)] +#[repr(C)] +pub struct IretRegisters { + pub sepc: usize, +} + +impl IretRegisters { + pub fn dump(&self) { + println!("SEPC: {:>016X}", { self.sepc }); + } +} + +// NOTE: Layout of this structure must be synced with assembly code in exception.rs +#[derive(Default)] +#[repr(C)] +pub struct InterruptStack { + pub registers: Registers, + pub iret: IretRegisters, +} + +impl InterruptStack { + pub fn init(&mut self) { + const { + assert!(32 * 8 == size_of::()); + } + } + pub fn set_stack_pointer(&mut self, sp: usize) { + self.registers.x2 = sp; + } + pub fn stack_pointer(&self) -> usize { + self.registers.x2 + } + pub fn set_instr_pointer(&mut self, ip: usize) { + self.iret.sepc = ip; + } + pub fn instr_pointer(&self) -> usize { + self.iret.sepc + } + pub fn sig_archdep_reg(&self) -> usize { + self.registers.x5 + } + + pub fn set_syscall_ret_reg(&mut self, ret: usize) { + self.registers.x10 = ret; + } + + pub fn dump(&self) { + self.iret.dump(); + self.registers.dump(); + } + + /// Saves all registers to a struct used by the proc: + /// scheme to read/write registers. + pub fn save(&self, all: &mut IntRegisters) { + all.pc = self.iret.sepc; + all.x31 = self.registers.x31; + all.x30 = self.registers.x30; + all.x29 = self.registers.x29; + all.x28 = self.registers.x28; + all.x27 = self.registers.x27; + all.x26 = self.registers.x26; + all.x25 = self.registers.x25; + all.x24 = self.registers.x24; + all.x23 = self.registers.x23; + all.x22 = self.registers.x22; + all.x21 = self.registers.x21; + all.x20 = self.registers.x20; + all.x19 = self.registers.x19; + all.x18 = self.registers.x18; + all.x17 = self.registers.x17; + all.x16 = self.registers.x16; + all.x15 = self.registers.x15; + all.x14 = self.registers.x14; + all.x13 = self.registers.x13; + all.x12 = self.registers.x12; + all.x11 = self.registers.x11; + all.x10 = self.registers.x10; + all.x9 = self.registers.x9; + all.x8 = self.registers.x8; + all.x7 = self.registers.x7; + all.x6 = self.registers.x6; + all.x5 = self.registers.x5; + all.x2 = self.registers.x2; + all.x1 = self.registers.x1; + } + + /// Loads all registers from a struct used by the proc: + /// scheme to read/write registers. + pub fn load(&mut self, all: &IntRegisters) { + self.iret.sepc = all.pc; + self.registers.x31 = all.x31; + self.registers.x30 = all.x30; + self.registers.x29 = all.x29; + self.registers.x28 = all.x28; + self.registers.x27 = all.x27; + self.registers.x26 = all.x26; + self.registers.x25 = all.x25; + self.registers.x24 = all.x24; + self.registers.x23 = all.x23; + self.registers.x22 = all.x22; + self.registers.x21 = all.x21; + self.registers.x20 = all.x20; + self.registers.x19 = all.x19; + self.registers.x18 = all.x18; + self.registers.x17 = all.x17; + self.registers.x16 = all.x16; + self.registers.x15 = all.x15; + self.registers.x14 = all.x14; + self.registers.x13 = all.x13; + self.registers.x12 = all.x12; + self.registers.x11 = all.x11; + self.registers.x10 = all.x10; + self.registers.x9 = all.x9; + self.registers.x8 = all.x8; + self.registers.x7 = all.x7; + self.registers.x6 = all.x6; + self.registers.x5 = all.x5; + self.registers.x2 = all.x2; + self.registers.x1 = all.x1; + } + + //TODO + pub fn is_singlestep(&self) -> bool { + false + } + pub fn set_singlestep(&mut self, _singlestep: bool) {} +} + +impl ArchIntCtx for InterruptStack { + fn ip(&self) -> usize { + self.iret.sepc + } + fn recover_and_efault(&mut self) { + // Set the return value to nonzero to indicate usercopy failure (EFAULT), and emulate the + // return instruction by setting the return pointer to the saved LR value. + self.iret.sepc = self.registers.x1; // ra + self.registers.x10 = 1; // a0 + } +} + +/// Except for sp and tp +#[macro_export] +macro_rules! push_registers { + () => { + " + addi sp, sp, -32 * 8 + sd x1, (0 * 8)(sp) + // skip sp + sd x3, (2 * 8)(sp) + // skip tp + sd x5, (4 * 8)(sp) + sd x6, (5 * 8)(sp) + sd x7, (6 * 8)(sp) + sd x8, (7 * 8)(sp) + sd x9, (8 * 8)(sp) + sd x10, (9 * 8)(sp) + sd x11, (10 * 8)(sp) + sd x12, (11 * 8)(sp) + sd x13, (12 * 8)(sp) + sd x14, (13 * 8)(sp) + sd x15, (14 * 8)(sp) + sd x16, (15 * 8)(sp) + sd x17, (16 * 8)(sp) + sd x18, (17 * 8)(sp) + sd x19, (18 * 8)(sp) + sd x20, (19 * 8)(sp) + sd x21, (20 * 8)(sp) + sd x22, (21 * 8)(sp) + sd x23, (22 * 8)(sp) + sd x24, (23 * 8)(sp) + sd x25, (24 * 8)(sp) + sd x26, (25 * 8)(sp) + sd x27, (26 * 8)(sp) + sd x28, (27 * 8)(sp) + sd x29, (28 * 8)(sp) + sd x30, (29 * 8)(sp) + sd x31, (30 * 8)(sp) + + csrr t0, sepc + sd t0, (31 * 8)(sp) + " + }; // keep sepc value in t0 on exit +} + +#[macro_export] +macro_rules! pop_registers { + () => { + " + ld t0, (31 * 8)(sp) + csrw sepc, t0 + + ld x1, (0 * 8)(sp) + // skip sp, it'll be restored later + ld x3, (2 * 8)(sp) + ld x4, (3 * 8)(sp) + ld x5, (4 * 8)(sp) + ld x6, (5 * 8)(sp) + ld x7, (6 * 8)(sp) + ld x8, (7 * 8)(sp) + ld x9, (8 * 8)(sp) + ld x10, (9 * 8)(sp) + ld x11, (10 * 8)(sp) + ld x12, (11 * 8)(sp) + ld x13, (12 * 8)(sp) + ld x14, (13 * 8)(sp) + ld x15, (14 * 8)(sp) + ld x16, (15 * 8)(sp) + ld x17, (16 * 8)(sp) + ld x18, (17 * 8)(sp) + ld x19, (18 * 8)(sp) + ld x20, (19 * 8)(sp) + ld x21, (20 * 8)(sp) + ld x22, (21 * 8)(sp) + ld x23, (22 * 8)(sp) + ld x24, (23 * 8)(sp) + ld x25, (24 * 8)(sp) + ld x26, (25 * 8)(sp) + ld x27, (26 * 8)(sp) + ld x28, (27 * 8)(sp) + ld x29, (28 * 8)(sp) + ld x30, (29 * 8)(sp) + ld x31, (30 * 8)(sp) + ld sp, (1 * 8)(sp) + " + }; +} + +#[naked] +pub unsafe extern "C" fn enter_usermode() -> ! { + core::arch::naked_asm!(concat!( + "jalr s11\n", + "li t0, 1 << 8\n", // force U mode on sret + "csrc sstatus, t0\n", + "li t0, 0x6000\n", // set FS to dirty (enable FPU in U mode) + "csrs sstatus, t0\n", + "addi t0, sp, 32 * 8\n", // save S mode stack to percpu + "sd t0, 8(tp)\n", + pop_registers!(), + "sret\n", + )) +} diff --git a/src/arch/riscv64/interrupt/mod.rs b/src/arch/riscv64/interrupt/mod.rs new file mode 100644 index 00000000..76144004 --- /dev/null +++ b/src/arch/riscv64/interrupt/mod.rs @@ -0,0 +1,62 @@ +use core::arch::asm; + +#[macro_use] +mod handler; + +mod exception; +pub mod syscall; +pub mod trace; + +pub use handler::InterruptStack; + +/// Clear interrupts +#[inline(always)] +pub unsafe fn disable() { + asm!("csrci sstatus, 1 << 1") +} + +/// Set interrupts +#[inline(always)] +pub unsafe fn enable() { + asm!("csrsi sstatus, 1 << 1") +} + +/// Set interrupts and halt +/// This will atomically wait for the next interrupt +/// Performing enable followed by halt is not guaranteed to be atomic, use this instead! +#[inline(always)] +pub unsafe fn enable_and_halt() { + asm!("csrsi sstatus, 1 << 1", "wfi") +} + +/// Set interrupts and nop +/// This will enable interrupts and allow the IF flag to be processed +/// Simply enabling interrupts does not gurantee that they will trigger, use this instead! +#[inline(always)] +pub unsafe fn enable_and_nop() { + asm!("csrsi sstatus, 1 << 1", "nop") +} + +/// Halt instruction +#[inline(always)] +pub unsafe fn halt() { + asm!("wfi", options(nomem, nostack)) +} + +/// Pause instruction +/// Safe because it is similar to a NOP, and has no memory effects +#[inline(always)] +pub fn pause() { + unsafe { + // It's a hint instruction, safe to execute without Zihintpause extension + asm!("pause", options(nomem, nostack)); + } +} + +pub unsafe fn init() { + // Setup interrupt handlers + asm!( + "la t0, exception_handler", // WARL=0 - direct mode combined handler + "csrw stvec, t0" + ); +} diff --git a/src/arch/riscv64/interrupt/syscall.rs b/src/arch/riscv64/interrupt/syscall.rs new file mode 100644 index 00000000..b7c67514 --- /dev/null +++ b/src/arch/riscv64/interrupt/syscall.rs @@ -0,0 +1 @@ +pub use super::handler::enter_usermode; diff --git a/src/arch/riscv64/interrupt/trace.rs b/src/arch/riscv64/interrupt/trace.rs new file mode 100644 index 00000000..88d754a2 --- /dev/null +++ b/src/arch/riscv64/interrupt/trace.rs @@ -0,0 +1,31 @@ +use core::{arch::asm, mem}; + +pub struct StackTrace { + pub fp: usize, + pub pc_ptr: *const usize, +} + +impl StackTrace { + #[inline(always)] + pub unsafe fn start() -> Option { + let fp: usize; + asm!("mv {}, fp", out(reg) fp); + + let pc_ptr = fp.checked_sub(mem::size_of::())?; + let fp = pc_ptr.checked_sub(mem::size_of::())?; + Some(StackTrace { + fp, + pc_ptr: pc_ptr as *const usize, + }) + } + + pub unsafe fn next(self) -> Option { + let fp = *(self.fp as *const usize); + let pc_ptr = fp.checked_sub(mem::size_of::())?; + let fp = pc_ptr.checked_sub(mem::size_of::())?; + Some(StackTrace { + fp: fp, + pc_ptr: pc_ptr as *const usize, + }) + } +} diff --git a/src/arch/x86_64/ipi.rs b/src/arch/riscv64/ipi.rs similarity index 56% rename from src/arch/x86_64/ipi.rs rename to src/arch/riscv64/ipi.rs index 97996dcd..cc4df342 100644 --- a/src/arch/x86_64/ipi.rs +++ b/src/arch/riscv64/ipi.rs @@ -21,9 +21,12 @@ pub fn ipi(_kind: IpiKind, _target: IpiTarget) {} #[cfg(feature = "multi_core")] #[inline(always)] -pub fn ipi(kind: IpiKind, target: IpiTarget) { - use device::local_apic::LOCAL_APIC; +pub fn ipi(_kind: IpiKind, _target: IpiTarget) {} - let icr = (target as u64) << 18 | 1 << 14 | (kind as u64); - unsafe { LOCAL_APIC.set_icr(icr) }; -} +#[cfg(not(feature = "multi_core"))] +#[inline(always)] +pub fn ipi_single(_kind: IpiKind, _target: crate::cpu_set::LogicalCpuId) {} + +#[cfg(feature = "multi_core")] +#[inline(always)] +pub fn ipi_single(_kind: IpiKind, _target: crate::cpu_set::LogicalCpuId) {} diff --git a/src/arch/riscv64/macros.rs b/src/arch/riscv64/macros.rs new file mode 100644 index 00000000..4e3566fc --- /dev/null +++ b/src/arch/riscv64/macros.rs @@ -0,0 +1,16 @@ +/// Print to console +#[macro_export] +macro_rules! print { + ($($arg:tt)*) => ({ + use core::fmt::Write; + let _ = write!($crate::arch::debug::Writer::new(), $($arg)*); + }); +} + +/// Print with new line to console +#[macro_export] +macro_rules! println { + () => (print!("\n")); + ($fmt:expr) => (print!(concat!($fmt, "\n"))); + ($fmt:expr, $($arg:tt)*) => (print!(concat!($fmt, "\n"), $($arg)*)); +} diff --git a/src/arch/riscv64/misc.rs b/src/arch/riscv64/misc.rs new file mode 100644 index 00000000..60bde118 --- /dev/null +++ b/src/arch/riscv64/misc.rs @@ -0,0 +1,45 @@ +use core::arch::asm; + +use crate::{ + cpu_set::LogicalCpuId, + paging::{RmmA, RmmArch}, + percpu::PercpuBlock, +}; + +#[repr(C)] +pub struct ArchPercpu { + // These fields must be kept first and in this order. Assembly in exception.rs depends on it + pub tmp: usize, + pub s_sp: usize, + + pub percpu: PercpuBlock, +} + +impl PercpuBlock { + pub fn current() -> &'static Self { + unsafe { + let tp: *const ArchPercpu; + asm!( "mv t0, tp", out("t0") tp ); + let arch_percpu = &*tp; + &arch_percpu.percpu + } + } +} + +#[cold] +pub unsafe fn init(cpu_id: LogicalCpuId) { + let frame = crate::memory::allocate_frame().expect("failed to allocate percpu memory"); + let virt = RmmA::phys_to_virt(frame.base()).data() as *mut ArchPercpu; + + virt.write(ArchPercpu { + tmp: 0, + s_sp: 0, + percpu: PercpuBlock::init(cpu_id), + }); + + asm!( + "mv tp, {}", + "csrw sscratch, tp", + in(reg) virt as usize + ); +} diff --git a/src/arch/riscv64/mod.rs b/src/arch/riscv64/mod.rs new file mode 100644 index 00000000..62d1afbd --- /dev/null +++ b/src/arch/riscv64/mod.rs @@ -0,0 +1,70 @@ +#[macro_use] +pub mod macros; + +pub mod consts; +pub mod debug; +pub mod device; +pub mod interrupt; +pub mod ipi; +pub mod misc; +pub mod paging; +pub mod rmm; +pub mod start; +pub mod stop; +pub mod time; + +pub use ::rmm::RiscV64Sv48Arch as CurrentRmmArch; +use core::arch::naked_asm; + +pub use arch_copy_to_user as arch_copy_from_user; + +#[link_section = ".usercopy-fns"] +#[naked] +pub unsafe extern "C" fn arch_copy_to_user(dst: usize, src: usize, len: usize) -> u8 { + naked_asm!( + " + addi sp, sp, -16 + sd fp, 0(sp) + sd ra, 8(sp) + addi fp, sp, 16 + li t1, 1 << 18 // SUM + csrs sstatus, t1 + jal 2f + csrc sstatus, t1 + ld ra, -8(fp) + ld fp, -16(fp) + addi sp, sp, 16 + ret + + 2: or t0, a0, a1 + andi t0, t0, 7 + bne t0, x0, 4f + srli t2, a2, 3 + andi a2, a2, 7 + beq t2, x0, 4f + 3: ld t0, 0(a1) + sd t0, 0(a0) + addi a0, a0, 8 + addi a1, a1, 8 + addi t2, t2, -1 + bne t2, x0, 3b + + 4: beq a2, x0, 5f + lb t0, 0(a1) + sb t0, 0(a0) + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + bne a2, x0, 4b + 5: mv a0, x0 + ret + " + ) +} + +pub const KFX_SIZE: usize = 1024; + +// This function exists as the KFX size is dynamic on x86_64. +pub fn kfx_size() -> usize { + KFX_SIZE +} diff --git a/src/arch/riscv64/paging/entry.rs b/src/arch/riscv64/paging/entry.rs new file mode 100644 index 00000000..affe00f6 --- /dev/null +++ b/src/arch/riscv64/paging/entry.rs @@ -0,0 +1,11 @@ +/// A page table entry +#[repr(packed(8))] +pub struct Entry(u64); + +bitflags! { + pub struct EntryFlags: usize { + const NO_CACHE = 1 << 4; + const DEV_MEM = 0; + const WRITE_COMBINING = 0; + } +} diff --git a/src/arch/riscv64/paging/mapper.rs b/src/arch/riscv64/paging/mapper.rs new file mode 100644 index 00000000..1543c6ff --- /dev/null +++ b/src/arch/riscv64/paging/mapper.rs @@ -0,0 +1,29 @@ +use crate::ipi::{ipi, IpiKind, IpiTarget}; + +use super::RmmA; + +pub use rmm::{Flusher, PageFlush, PageFlushAll}; + +pub struct InactiveFlusher { + _inner: (), +} +impl InactiveFlusher { + // TODO: cpu id + pub fn new() -> Self { + Self { _inner: () } + } +} + +impl Flusher for InactiveFlusher { + fn consume(&mut self, flush: PageFlush) { + // TODO: Push to TLB "mailbox" or tell it to reload CR3 if there are too many entries. + unsafe { + flush.ignore(); + } + } +} +impl Drop for InactiveFlusher { + fn drop(&mut self) { + ipi(IpiKind::Tlb, IpiTarget::Other); + } +} diff --git a/src/arch/riscv64/paging/mod.rs b/src/arch/riscv64/paging/mod.rs new file mode 100644 index 00000000..1fa656f2 --- /dev/null +++ b/src/arch/riscv64/paging/mod.rs @@ -0,0 +1,89 @@ +#![allow(unused)] + +pub use super::CurrentRmmArch as RmmA; +pub use rmm::{Arch as RmmArch, PageFlags, PhysicalAddress, TableKind, VirtualAddress}; + +pub type PageMapper = rmm::PageMapper; +pub use crate::rmm::KernelMapper; + +pub mod entry; +pub mod mapper; + +/// Number of entries per page table +pub const ENTRY_COUNT: usize = RmmA::PAGE_ENTRIES; + +/// Size of pages +pub const PAGE_SIZE: usize = RmmA::PAGE_SIZE; +pub const PAGE_MASK: usize = RmmA::PAGE_OFFSET_MASK; + +#[cold] +pub unsafe fn init() { + // Assuming SBI already set up PMAs correctly for us + // TODO: detect Svpbmt present/enabled and override device memory with PBMT=IO +} + +/// Page +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Page { + number: usize, +} + +impl Page { + pub fn start_address(self) -> VirtualAddress { + VirtualAddress::new(self.number * PAGE_SIZE) + } + + pub fn containing_address(address: VirtualAddress) -> Page { + Page { + number: address.data() / PAGE_SIZE, + } + } + + pub fn range_inclusive(start: Page, r#final: Page) -> PageIter { + PageIter { + start, + end: r#final.next(), + } + } + + pub fn next(self) -> Page { + self.next_by(1) + } + pub fn next_by(self, n: usize) -> Page { + Self { + number: self.number + n, + } + } + + pub fn offset_from(self, other: Self) -> usize { + self.number - other.number + } +} + +pub struct PageIter { + start: Page, + end: Page, +} + +impl Iterator for PageIter { + type Item = Page; + + fn next(&mut self) -> Option { + if self.start < self.end { + let page = self.start; + self.start = self.start.next(); + Some(page) + } else { + None + } + } +} + +/// Round down to the nearest multiple of page size +pub fn round_down_pages(number: usize) -> usize { + number - number % PAGE_SIZE +} +/// Round up to the nearest multiple of page size +pub fn round_up_pages(number: usize) -> usize { + round_down_pages(number + PAGE_SIZE - 1) +} diff --git a/src/arch/riscv64/rmm.rs b/src/arch/riscv64/rmm.rs new file mode 100644 index 00000000..a4be789a --- /dev/null +++ b/src/arch/riscv64/rmm.rs @@ -0,0 +1,30 @@ +use rmm::{Arch, PageFlags, VirtualAddress}; + +pub struct KernelMapper { + mapper: crate::paging::PageMapper, + ro: bool, +} +impl KernelMapper { + pub fn lock() -> Self { + unimplemented!() + } + pub fn get_mut(&mut self) -> Option<&mut crate::paging::PageMapper> { + unimplemented!() + } +} + +pub unsafe fn page_flags(virt: VirtualAddress) -> PageFlags { + use crate::kernel_executable_offsets::*; + let virt_addr = virt.data(); + + if virt_addr >= __text_start() && virt_addr < __text_end() { + // Remap text read-only, execute + PageFlags::new().execute(true) + } else if virt_addr >= __rodata_start() && virt_addr < __rodata_end() { + // Remap rodata read-only, no execute + PageFlags::new() + } else { + // Remap everything else read-write, no execute + PageFlags::new().write(true) + } +} diff --git a/src/arch/riscv64/start.rs b/src/arch/riscv64/start.rs new file mode 100644 index 00000000..e29ac32c --- /dev/null +++ b/src/arch/riscv64/start.rs @@ -0,0 +1,232 @@ +use core::{ + arch::asm, + slice, + sync::atomic::{AtomicU32, AtomicUsize, Ordering}, +}; +use fdt::Fdt; +use log::info; + +use crate::{ + allocator, + memory::Frame, + paging::{PhysicalAddress, PAGE_SIZE}, +}; + +use crate::{ + arch::{device::serial::init_early, interrupt, paging}, + device, + startup::memory::{register_bootloader_areas, register_memory_region, BootloaderMemoryKind}, +}; + +#[cfg(feature = "graphical_debug")] +use crate::devices::graphical_debug; +use crate::dtb::register_dev_memory_ranges; + +/// Test of zero values in BSS. +static mut BSS_TEST_ZERO: usize = 0; +/// Test of non-zero values in data. +static mut DATA_TEST_NONZERO: usize = 0xFFFF_FFFF_FFFF_FFFF; + +pub static KERNEL_BASE: AtomicUsize = AtomicUsize::new(0); +pub static KERNEL_SIZE: AtomicUsize = AtomicUsize::new(0); +pub static CPU_COUNT: AtomicU32 = AtomicU32::new(0); +pub static BOOT_HART_ID: AtomicUsize = AtomicUsize::new(0); + +#[repr(packed)] +pub struct KernelArgs { + kernel_base: usize, + kernel_size: usize, + stack_base: usize, + stack_size: usize, + env_base: usize, + env_size: usize, + acpi_base: usize, + acpi_size: usize, + areas_base: usize, + areas_size: usize, + + /// The physical base 64-bit pointer to the contiguous bootstrap/initfs. + bootstrap_base: usize, + /// Size of contiguous bootstrap/initfs physical region, not necessarily page aligned. + bootstrap_size: usize, +} + +fn get_boot_hart_id(env: &[u8]) -> Option { + for line in core::str::from_utf8(env).unwrap_or("").lines() { + let mut parts = line.splitn(2, '='); + let name = parts.next().unwrap_or(""); + let value = parts.next().unwrap_or(""); + + if name == "BOOT_HART_ID" { + return usize::from_str_radix(value, 16).ok(); + } + } + None +} + +/// The entry to Rust, all things must be initialized +#[no_mangle] +pub unsafe extern "C" fn kstart(args_ptr: *const KernelArgs) -> ! { + asm!( + "mv tp, x0", // reset percpu until it is initialized + "csrw sscratch, tp", + "sd x0, -16(fp)", // and stop frame walker here + "sd x0, -8(fp)", + ); + + let bootstrap = { + let args = &*args_ptr; + + // BSS should already be zero + { + assert_eq!(BSS_TEST_ZERO, 0); + assert_eq!(DATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFF); + } + + KERNEL_BASE.store(args.kernel_base, Ordering::SeqCst); + KERNEL_SIZE.store(args.kernel_size, Ordering::SeqCst); + + let env = slice::from_raw_parts( + (crate::PHYS_OFFSET + args.env_base) as *const u8, + args.env_size, + ); + + let dtb_data = if args.acpi_base != 0 { + Some((crate::PHYS_OFFSET + args.acpi_base, args.acpi_size)) + } else { + None + }; + let dtb = dtb_data + .map(|(base, size)| unsafe { slice::from_raw_parts(base as *const u8, size) }) + .and_then(|data| Fdt::new(data).ok()); + + #[cfg(feature = "graphical_debug")] + graphical_debug::init(env); + + #[cfg(feature = "serial_debug")] + if let Some(dtb) = &dtb { + init_early(dtb); + } + + // Initialize logger + crate::log::init_logger(|r| { + use core::fmt::Write; + let _ = write!( + crate::debug::Writer::new(), + "{}:{} -- {}\n", + r.target(), + r.level(), + r.args() + ); + }); + ::log::set_max_level(::log::LevelFilter::Debug); + + info!("Redox OS starting..."); + info!( + "Kernel: {:X}:{:X}", + { args.kernel_base }, + args.kernel_base + args.kernel_size + ); + info!( + "Stack: {:X}:{:X}", + { args.stack_base }, + args.stack_base + args.stack_size + ); + info!( + "Env: {:X}:{:X}", + { args.env_base }, + args.env_base + args.env_size + ); + info!( + "RSDPs: {:X}:{:X}", + { args.acpi_size }, + args.acpi_size + args.acpi_size + ); + info!( + "Areas: {:X}:{:X}", + { args.areas_base }, + args.areas_base + args.areas_size + ); + info!( + "Bootstrap: {:X}:{:X}", + { args.bootstrap_base }, + args.bootstrap_base + args.bootstrap_size + ); + + if let Some(dtb) = &dtb { + device::dump_fdt(&dtb); + } + + interrupt::init(); + + let bootstrap = crate::Bootstrap { + base: Frame::containing(PhysicalAddress::new(args.bootstrap_base)), + page_count: args.bootstrap_size / PAGE_SIZE, + env, + }; + + // Initialize RMM + register_bootloader_areas(args.areas_base, args.areas_size); + if let Some(dt) = &dtb { + register_dev_memory_ranges(dt); + } + + register_memory_region( + args.kernel_base, + args.kernel_size, + BootloaderMemoryKind::Kernel, + ); + register_memory_region( + args.stack_base, + args.stack_size, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.env_base, + args.env_size, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.acpi_base, + args.acpi_size, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.bootstrap_base, + args.bootstrap_size, + BootloaderMemoryKind::IdentityMap, + ); + + crate::startup::memory::init(None, None); + + let boot_hart_id = get_boot_hart_id(env).expect("Didn't get boot HART id from bootloader"); + info!("Booting on HART {}", boot_hart_id); + BOOT_HART_ID.store(boot_hart_id, Ordering::Relaxed); + + paging::init(); + + crate::misc::init(crate::cpu_set::LogicalCpuId::new(0)); + + CPU_COUNT.store(1, Ordering::SeqCst); + + // Setup kernel heap + allocator::init(); + + // Activate memory logging + crate::log::init(); + + crate::dtb::init(dtb_data); + + // Initialize devices + device::init(); + + // Initialize all of the non-core devices not otherwise needed to complete initialization + device::init_noncore(); + + // FIXME bringup AP HARTs + + bootstrap + }; + + crate::kmain(CPU_COUNT.load(Ordering::SeqCst), bootstrap); +} diff --git a/src/arch/riscv64/stop.rs b/src/arch/riscv64/stop.rs new file mode 100644 index 00000000..b0bd5793 --- /dev/null +++ b/src/arch/riscv64/stop.rs @@ -0,0 +1,13 @@ +pub unsafe fn kreset() -> ! { + println!("kreset"); + unimplemented!() +} + +pub unsafe fn emergency_reset() -> ! { + unimplemented!() +} + +pub unsafe fn kstop() -> ! { + println!("kstop"); + unimplemented!() +} diff --git a/src/arch/riscv64/time.rs b/src/arch/riscv64/time.rs new file mode 100644 index 00000000..82c8e789 --- /dev/null +++ b/src/arch/riscv64/time.rs @@ -0,0 +1,26 @@ +use core::{ + arch::asm, + sync::atomic::{AtomicUsize, Ordering}, +}; + +static MTIME_FREQ_HZ: AtomicUsize = AtomicUsize::new(0); + +pub fn init(freq_hz: usize) { + MTIME_FREQ_HZ.store(freq_hz, Ordering::Relaxed); +} + +pub fn monotonic_absolute() -> u128 { + let freq_hz = MTIME_FREQ_HZ.load(Ordering::Relaxed); + if freq_hz > 0 { + let counter: usize; + unsafe { + asm!( + "rdtime t0", + lateout("t0") counter + ); + }; + counter as u128 * 1_000_000_000u128 / freq_hz as u128 + } else { + 0 + } +} diff --git a/src/arch/x86/consts.rs b/src/arch/x86/consts.rs new file mode 100644 index 00000000..73946ecd --- /dev/null +++ b/src/arch/x86/consts.rs @@ -0,0 +1,33 @@ +#![allow(unused)] +// Because the memory map is so important to not be aliased, it is defined here, in one place +// The lower 256 PML4 entries are reserved for userspace +// Each PML4 entry references up to 512 GB of memory +// The second from the top (510) PML4 is reserved for the kernel + +/// Offset of kernel (256 MiB max) +pub const KERNEL_OFFSET: usize = 0xC000_0000; + +// Framebuffer mapped by bootloader to 0xD000_0000 (128 MiB max) + +// Offset to APIC mappings (optional) +pub const LAPIC_OFFSET: usize = 0xD800_0000; +pub const IOAPIC_OFFSET: usize = LAPIC_OFFSET + 4096; +pub const HPET_OFFSET: usize = IOAPIC_OFFSET + 4096; + +/// Offset to kernel heap (256 MiB max) +pub const KERNEL_HEAP_OFFSET: usize = 0xE000_0000; +/// Size of kernel heap +pub const KERNEL_HEAP_SIZE: usize = rmm::MEGABYTE; + +/// Offset to kernel percpu variables (256 MiB max) +pub const KERNEL_PERCPU_OFFSET: usize = 0xF000_0000; +/// Size of kernel percpu variables +pub const KERNEL_PERCPU_SHIFT: u8 = 16; // 2^16 = 64 KiB +pub const KERNEL_PERCPU_SIZE: usize = 1_usize << KERNEL_PERCPU_SHIFT; + +/// Offset of physmap (1 GiB max) +// This needs to match RMM's PHYS_OFFSET +pub const PHYS_OFFSET: usize = 0x8000_0000; + +/// End offset of the user image, i.e. kernel start +pub const USER_END_OFFSET: usize = 0x8000_0000; diff --git a/src/arch/x86/gdt.rs b/src/arch/x86/gdt.rs new file mode 100644 index 00000000..8aa9722d --- /dev/null +++ b/src/arch/x86/gdt.rs @@ -0,0 +1,277 @@ +//! Global descriptor table + +use core::{mem, ptr::addr_of_mut}; + +use crate::cpu_set::LogicalCpuId; + +use x86::{ + bits32::task::TaskStateSegment, + dtables::{self, DescriptorTablePointer}, + segmentation::{self, Descriptor as SegmentDescriptor, SegmentSelector}, + task, Ring, +}; + +use crate::paging::{RmmA, RmmArch, PAGE_SIZE}; + +pub const GDT_NULL: usize = 0; +pub const GDT_KERNEL_CODE: usize = 1; +pub const GDT_KERNEL_DATA: usize = 2; +pub const GDT_KERNEL_PERCPU: usize = 3; +pub const GDT_USER_CODE: usize = 4; +pub const GDT_USER_DATA: usize = 5; +pub const GDT_USER_FS: usize = 6; +pub const GDT_USER_GS: usize = 7; +pub const GDT_TSS: usize = 8; + +pub const GDT_A_PRESENT: u8 = 1 << 7; +pub const GDT_A_RING_0: u8 = 0 << 5; +pub const GDT_A_RING_1: u8 = 1 << 5; +pub const GDT_A_RING_2: u8 = 2 << 5; +pub const GDT_A_RING_3: u8 = 3 << 5; +pub const GDT_A_SYSTEM: u8 = 1 << 4; +pub const GDT_A_EXECUTABLE: u8 = 1 << 3; +pub const GDT_A_CONFORMING: u8 = 1 << 2; +pub const GDT_A_PRIVILEGE: u8 = 1 << 1; +pub const GDT_A_DIRTY: u8 = 1; + +pub const GDT_A_TSS_AVAIL: u8 = 0x9; +pub const GDT_A_TSS_BUSY: u8 = 0xB; + +pub const GDT_F_PAGE_SIZE: u8 = 1 << 7; +pub const GDT_F_PROTECTED_MODE: u8 = 1 << 6; +pub const GDT_F_LONG_MODE: u8 = 1 << 5; + +static INIT_GDT: [GdtEntry; 3] = [ + // Null + GdtEntry::new(0, 0, 0, 0), + // Kernel code + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), + // Kernel data + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), +]; + +const BASE_GDT: [GdtEntry; 9] = [ + // Null + GdtEntry::new(0, 0, 0, 0), + // Kernel code + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), + // Kernel data + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), + // Kernel TLS + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), + // User (32-bit) code + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), + // User data + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), + // User FS (for TLS) + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), + // User GS (for TLS) + GdtEntry::new( + 0, + 0xFFFFF, + GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_PAGE_SIZE | GDT_F_PROTECTED_MODE, + ), + // TSS + GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_TSS_AVAIL, 0), +]; + +const IOBITMAP_SIZE: usize = 8192; + +#[repr(C, align(4096))] +pub struct ProcessorControlRegion { + pub self_ref: usize, + pub user_rsp_tmp: usize, + pub gdt: [GdtEntry; 9], + percpu: crate::percpu::PercpuBlock, + pub tss: TssWrapper, + pub _pio_bitmap: [u8; IOBITMAP_SIZE], + pub _all_ones: u8, +} + +// NOTE: Despite not using #[repr(C, packed)], we do know that while there may be some padding +// inserted before and after the TSS, the main TSS structure will remain intact. +#[repr(C, align(16))] +pub struct TssWrapper(pub TaskStateSegment); + +pub unsafe fn pcr() -> *mut ProcessorControlRegion { + let mut ret: *mut ProcessorControlRegion; + core::arch::asm!("mov {}, gs:[{}]", out(reg) ret, const(core::mem::offset_of!(ProcessorControlRegion, self_ref))); + ret +} + +#[cfg(feature = "pti")] +pub unsafe fn set_tss_stack(stack: usize) { + use super::pti::{PTI_CONTEXT_STACK, PTI_CPU_STACK}; + addr_of_mut!((*pcr()).tss.0.ss0).write((GDT_KERNEL_DATA << 3) as u16); + addr_of_mut!((*pcr()).tss.0.esp0) + .write((PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len()) as u32); + PTI_CONTEXT_STACK = stack; +} + +#[cfg(not(feature = "pti"))] +pub unsafe fn set_tss_stack(stack: usize) { + addr_of_mut!((*pcr()).tss.0.ss0).write((GDT_KERNEL_DATA << 3) as u16); + addr_of_mut!((*pcr()).tss.0.esp0).write(stack as u32); +} +pub unsafe fn set_userspace_io_allowed(allowed: bool) { + addr_of_mut!((*pcr()).tss.0.iobp_offset).write(if allowed { + mem::size_of::() as u16 + } else { + 0xFFFF + }); +} + +/// Initialize a minimal GDT without configuring percpu. +pub unsafe fn init() { + // Load the initial GDT, before the kernel remaps itself. + dtables::lgdt(&DescriptorTablePointer { + limit: (INIT_GDT.len() * mem::size_of::() - 1) as u16, + base: INIT_GDT.as_ptr() as *const SegmentDescriptor, + }); + + // Load the segment descriptors + segmentation::load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0)); + segmentation::load_ds(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + segmentation::load_es(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); +} + +/// Initialize GDT and configure percpu. +pub unsafe fn init_paging(stack_offset: usize, cpu_id: LogicalCpuId) { + let alloc_order = mem::size_of::() + .div_ceil(PAGE_SIZE) + .next_power_of_two() + .trailing_zeros(); + let pcr_frame = + crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR frame"); + let pcr = &mut *(RmmA::phys_to_virt(pcr_frame.base()).data() as *mut ProcessorControlRegion); + + pcr.self_ref = pcr as *const _ as usize; + pcr.gdt = BASE_GDT; + pcr.gdt[GDT_KERNEL_PERCPU].set_offset(pcr as *const _ as u32); + + let gdtr: DescriptorTablePointer = DescriptorTablePointer { + limit: (pcr.gdt.len() * mem::size_of::() - 1) as u16, + base: pcr.gdt.as_ptr() as *const SegmentDescriptor, + }; + + { + pcr._all_ones = 0xFF; + pcr.tss.0.iobp_offset = 0xFFFF; + let tss = &pcr.tss.0 as *const _ as usize as u32; + + pcr.gdt[GDT_TSS].set_offset(tss); + pcr.gdt[GDT_TSS] + .set_limit(mem::size_of::() as u32 + IOBITMAP_SIZE as u32); + } + + // Load the new GDT, which is correctly located in thread local storage. + dtables::lgdt(&gdtr); + + // Reload the segment descriptors + segmentation::load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0)); + segmentation::load_ds(SegmentSelector::new(GDT_USER_DATA as u16, Ring::Ring3)); + segmentation::load_es(SegmentSelector::new(GDT_USER_DATA as u16, Ring::Ring3)); + segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + + // TODO: Use FS for kernel percpu on i686? + segmentation::load_fs(SegmentSelector::new(GDT_USER_FS as u16, Ring::Ring0)); + segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_PERCPU as u16, Ring::Ring0)); + + // Set the stack pointer to use when coming back from userspace. + set_tss_stack(stack_offset); + + // Load the task register + task::load_tr(SegmentSelector::new(GDT_TSS as u16, Ring::Ring0)); + + pcr.percpu = crate::percpu::PercpuBlock::init(cpu_id); + crate::percpu::init_tlb_shootdown(cpu_id, &mut pcr.percpu); +} + +#[derive(Copy, Clone, Debug)] +#[repr(C, packed)] +pub struct GdtEntry { + pub limitl: u16, + pub offsetl: u16, + pub offsetm: u8, + pub access: u8, + pub flags_limith: u8, + pub offseth: u8, +} + +impl GdtEntry { + pub const fn new(offset: u32, limit: u32, access: u8, flags: u8) -> Self { + GdtEntry { + limitl: limit as u16, + offsetl: offset as u16, + offsetm: (offset >> 16) as u8, + access, + flags_limith: flags & 0xF0 | ((limit >> 16) as u8) & 0x0F, + offseth: (offset >> 24) as u8, + } + } + + pub fn offset(&self) -> u32 { + (self.offsetl as u32) | ((self.offsetm as u32) << 16) | ((self.offseth as u32) << 24) + } + + pub fn set_offset(&mut self, offset: u32) { + self.offsetl = offset as u16; + self.offsetm = (offset >> 16) as u8; + self.offseth = (offset >> 24) as u8; + } + + pub fn set_limit(&mut self, limit: u32) { + self.limitl = limit as u16; + self.flags_limith = self.flags_limith & 0xF0 | ((limit >> 16) as u8) & 0x0F; + } +} + +impl crate::percpu::PercpuBlock { + pub fn current() -> &'static Self { + unsafe { &*core::ptr::addr_of!((*pcr()).percpu) } + } +} diff --git a/src/arch/x86/interrupt/exception.rs b/src/arch/x86/interrupt/exception.rs new file mode 100644 index 00000000..f5f06e81 --- /dev/null +++ b/src/arch/x86/interrupt/exception.rs @@ -0,0 +1,265 @@ +use syscall::Exception; +use x86::irq::PageFaultError; + +use crate::{ + context::signal::excp_handler, interrupt, interrupt_error, interrupt_stack, + memory::GenericPfFlags, paging::VirtualAddress, panic::stack_trace, ptrace, syscall::flag::*, +}; + +interrupt_stack!(divide_by_zero, |stack| { + println!("Divide by zero"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 0, + ..Default::default() + }); +}); + +interrupt_stack!(debug, @paranoid, |stack| { + let mut handled = false; + + // Disable singlestep before there is a breakpoint, since the breakpoint + // handler might end up setting it again but unless it does we want the + // default to be false. + let had_singlestep = stack.iret.eflags & (1 << 8) == 1 << 8; + stack.set_singlestep(false); + + if ptrace::breakpoint_callback(PTRACE_STOP_SINGLESTEP, None).is_some() { + handled = true; + } else { + // There was no breakpoint, restore original value + stack.set_singlestep(had_singlestep); + } + + if !handled { + println!("Debug trap"); + stack.dump(); + excp_handler(Exception { + kind: 1, + ..Default::default() + }); + } +}); + +interrupt_stack!(non_maskable, @paranoid, |stack| { + println!("Non-maskable interrupt"); + stack.dump(); +}); + +interrupt_stack!(breakpoint, |stack| { + // The processor lets EIP point to the instruction *after* int3, so + // unhandled breakpoint interrupt don't go in an infinite loop. But we + // throw SIGTRAP anyway, so that's not a problem. + // + // We have the following code to prevent + // - EIP from going out of sync with instructions + // - The user having to do 2 syscalls to replace the instruction at EIP + // - Having more compatibility glue for GDB than necessary + // + // Let's just follow Linux convention and let EIP be EIP-1, point to the + // int3 instruction. After all, it's the sanest thing to do. + stack.iret.eip -= 1; + + if ptrace::breakpoint_callback(PTRACE_STOP_BREAKPOINT, None).is_none() { + println!("Breakpoint trap"); + stack.dump(); + excp_handler(Exception { + kind: 3, + ..Default::default() + }); + } +}); + +interrupt_stack!(overflow, |stack| { + println!("Overflow trap"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 4, + ..Default::default() + }); +}); + +interrupt_stack!(bound_range, |stack| { + println!("Bound range exceeded fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 5, + ..Default::default() + }); +}); + +interrupt_stack!(invalid_opcode, |stack| { + println!("Invalid opcode fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 6, + ..Default::default() + }); +}); + +interrupt_stack!(device_not_available, |stack| { + println!("Device not available fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 7, + ..Default::default() + }); +}); + +interrupt_error!(double_fault, |stack| { + println!("Double fault"); + stack.dump(); + stack_trace(); + loop { + interrupt::disable(); + interrupt::halt(); + } +}); + +interrupt_error!(invalid_tss, |stack| { + println!("Invalid TSS fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 10, + code: stack.code, + ..Default::default() + }); +}); + +interrupt_error!(segment_not_present, |stack| { + println!("Segment not present fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 11, + code: stack.code, + ..Default::default() + }); +}); + +interrupt_error!(stack_segment, |stack| { + println!("Stack segment fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 12, + code: stack.code, + ..Default::default() + }); +}); + +interrupt_error!(protection, |stack| { + println!("Protection fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 13, + code: stack.code, + ..Default::default() + }); +}); + +interrupt_error!(page, |stack| { + let cr2 = VirtualAddress::new(unsafe { x86::controlregs::cr2() }); + let arch_flags = PageFaultError::from_bits_truncate(stack.code as u32); + let mut generic_flags = GenericPfFlags::empty(); + + generic_flags.set( + GenericPfFlags::PRESENT, + arch_flags.contains(PageFaultError::P), + ); + generic_flags.set( + GenericPfFlags::INVOLVED_WRITE, + arch_flags.contains(PageFaultError::WR), + ); + generic_flags.set( + GenericPfFlags::USER_NOT_SUPERVISOR, + arch_flags.contains(PageFaultError::US), + ); + generic_flags.set( + GenericPfFlags::INVL, + arch_flags.contains(PageFaultError::RSVD), + ); + generic_flags.set( + GenericPfFlags::INSTR_NOT_DATA, + arch_flags.contains(PageFaultError::ID), + ); + + if crate::memory::page_fault_handler(&mut stack.inner, generic_flags, cr2).is_err() { + println!("Page fault: {:>08X} {:#?}", cr2.data(), arch_flags); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 14, + code: stack.code, + address: cr2.data(), + }); + } +}); + +interrupt_stack!(fpu_fault, |stack| { + println!("FPU floating point fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 16, + ..Default::default() + }); +}); + +interrupt_error!(alignment_check, |stack| { + println!("Alignment check fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 17, + code: stack.code, + ..Default::default() + }); +}); + +interrupt_stack!(machine_check, @paranoid, |stack| { + println!("Machine check fault"); + stack.dump(); + stack_trace(); + loop { + interrupt::disable(); + interrupt::halt(); + } +}); + +interrupt_stack!(simd, |stack| { + println!("SIMD floating point fault"); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 19, + ..Default::default() + }); +}); + +interrupt_stack!(virtualization, |stack| { + println!("Virtualization fault"); + stack.dump(); + stack_trace(); + loop { + interrupt::disable(); + interrupt::halt(); + } +}); + +interrupt_error!(security, |stack| { + println!("Security exception"); + stack.dump(); + stack_trace(); + loop { + interrupt::disable(); + interrupt::halt(); + } +}); diff --git a/src/arch/x86/interrupt/handler.rs b/src/arch/x86/interrupt/handler.rs new file mode 100644 index 00000000..973298e7 --- /dev/null +++ b/src/arch/x86/interrupt/handler.rs @@ -0,0 +1,457 @@ +use core::mem; + +use crate::{memory::ArchIntCtx, syscall::IntRegisters}; + +use super::super::flags::*; + +#[derive(Default)] +#[repr(C, packed)] +pub struct ScratchRegisters { + pub edx: usize, + pub ecx: usize, + pub eax: usize, +} + +impl ScratchRegisters { + pub fn dump(&self) { + println!("EAX: {:08x}", { self.eax }); + println!("ECX: {:08x}", { self.ecx }); + println!("EDX: {:08x}", { self.edx }); + } +} + +#[derive(Default)] +#[repr(C, packed)] +pub struct PreservedRegisters { + pub ebp: usize, + pub esi: usize, + pub edi: usize, + pub ebx: usize, +} + +impl PreservedRegisters { + pub fn dump(&self) { + println!("EBX: {:08x}", { self.ebx }); + println!("EDI: {:08x}", { self.edi }); + println!("ESI: {:08x}", { self.esi }); + println!("EBP: {:08x}", { self.ebp }); + } +} + +#[derive(Default)] +#[repr(C, packed)] +pub struct IretRegisters { + pub eip: usize, + pub cs: usize, + pub eflags: usize, + + // ---- + // The following will only be present if interrupt is raised from another + // privilege ring. Otherwise, they are undefined values. + // ---- + pub esp: usize, + pub ss: usize, +} + +impl IretRegisters { + pub fn dump(&self) { + println!("EFLAG: {:08x}", { self.eflags }); + println!("CS: {:08x}", { self.cs }); + println!("EIP: {:08x}", { self.eip }); + + if self.cs & 0b11 != 0b00 { + println!("ESP: {:08x}", { self.esp }); + println!("SS: {:08x}", { self.ss }); + } + } +} + +#[derive(Default)] +#[repr(C, packed)] +pub struct InterruptStack { + pub gs: usize, + pub preserved: PreservedRegisters, + pub scratch: ScratchRegisters, + pub iret: IretRegisters, +} + +impl InterruptStack { + pub fn init(&mut self) { + // Always enable interrupts! + self.iret.eflags = x86::bits32::eflags::EFlags::FLAGS_IF.bits() as usize; + self.iret.ss = (crate::gdt::GDT_USER_DATA << 3) | 3; + self.iret.cs = (crate::gdt::GDT_USER_CODE << 3) | 3; + self.gs = (crate::gdt::GDT_USER_GS << 3) | 3; + } + pub fn dump(&self) { + self.iret.dump(); + self.scratch.dump(); + self.preserved.dump(); + } + /// Saves all registers to a struct used by the proc: + /// scheme to read/write registers. + pub fn save(&self, all: &mut IntRegisters) { + all.ebp = self.preserved.ebp; + all.esi = self.preserved.esi; + all.edi = self.preserved.edi; + all.ebx = self.preserved.ebx; + all.edx = self.scratch.edx; + all.ecx = self.scratch.ecx; + all.eax = self.scratch.eax; + all.eip = self.iret.eip; + all.cs = self.iret.cs; + all.eflags = self.iret.eflags; + + // Set esp and ss: + + const CPL_MASK: usize = 0b11; + + let cs: usize; + unsafe { + core::arch::asm!("mov {}, cs", out(reg) cs); + } + + if self.iret.cs & CPL_MASK == cs & CPL_MASK { + // Privilege ring didn't change, so neither did the stack + all.esp = self as *const Self as usize // esp after Self was pushed to the stack + + mem::size_of::() // disregard Self + - mem::size_of::() * 2; // well, almost: esp and ss need to be excluded as they aren't present + unsafe { + core::arch::asm!("mov {}, ss", out(reg) all.ss); + } + } else { + all.esp = self.iret.esp; + all.ss = self.iret.ss; + } + } + pub fn set_stack_pointer(&mut self, esp: usize) { + self.iret.esp = esp; + } + pub fn instr_pointer(&self) -> usize { + self.iret.eip + } + pub fn sig_archdep_reg(&self) -> usize { + self.iret.eflags + } + pub fn set_instr_pointer(&mut self, eip: usize) { + self.iret.eip = eip; + } + /// Loads all registers from a struct used by the proc: + /// scheme to read/write registers. + pub fn load(&mut self, all: &IntRegisters) { + // TODO: Which of these should be allowed to change? + + self.preserved.ebp = all.ebp; + self.preserved.esi = all.esi; + self.preserved.edi = all.edi; + self.preserved.ebx = all.ebx; + self.scratch.edx = all.edx; + self.scratch.ecx = all.ecx; + self.scratch.eax = all.eax; + self.iret.eip = all.eip; + + // FIXME: The interrupt stack on which this is called, is always from userspace, but make + // the API safer. + self.iret.esp = all.esp; + + // OF, DF, 0, TF => D + // SF, ZF, 0, AF => D + // 0, PF, 1, CF => 5 + const ALLOWED_EFLAGS: usize = 0xDD5; + + // These should probably be restricted + // self.iret.cs = all.cs; + self.iret.eflags &= !ALLOWED_EFLAGS; + self.iret.eflags |= all.eflags & ALLOWED_EFLAGS; + } + /// Enables the "Trap Flag" in the FLAGS register, causing the CPU + /// to send a Debug exception after the next instruction. This is + /// used for singlestep in the proc: scheme. + pub fn set_singlestep(&mut self, enabled: bool) { + if enabled { + self.iret.eflags |= FLAG_SINGLESTEP; + } else { + self.iret.eflags &= !FLAG_SINGLESTEP; + } + } +} + +#[derive(Default)] +#[repr(C, packed)] +pub struct InterruptErrorStack { + pub code: usize, + pub inner: InterruptStack, +} + +impl InterruptErrorStack { + pub fn dump(&self) { + println!("CODE: {:08x}", { self.code }); + self.inner.dump(); + } +} + +#[macro_export] +macro_rules! push_scratch { + () => { + " + // Push scratch registers (minus eax) + push ecx + push edx + " + }; +} +#[macro_export] +macro_rules! pop_scratch { + () => { + " + // Pop scratch registers + pop edx + pop ecx + pop eax + " + }; +} + +#[macro_export] +macro_rules! push_preserved { + () => { + " + // Push preserved registers + push ebx + push edi + push esi + push ebp + " + }; +} +#[macro_export] +macro_rules! pop_preserved { + () => { + " + // Pop preserved registers + pop ebp + pop esi + pop edi + pop ebx + " + }; +} + +// Must always happen after push_scratch +macro_rules! enter_gs { + () => { + " + // Enter kernel GS segment + mov ecx, gs + push ecx + mov ecx, 0x18 + mov gs, ecx + " + }; +} + +// Must always happen before pop_scratch +macro_rules! exit_gs { + () => { + " + // Exit kernel GS segment + pop ecx + mov gs, ecx + " + }; +} + +#[macro_export] +macro_rules! interrupt_stack { + // XXX: Apparently we cannot use $expr and check for bool exhaustiveness, so we will have to + // use idents directly instead. + ($name:ident, |$stack:ident| $code:block) => { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "fastcall" fn inner($stack: &mut $crate::arch::x86::interrupt::InterruptStack) { + // TODO: Force the declarations to specify unsafe? + + #[allow(unused_unsafe)] + unsafe { + $code + } + } + core::arch::naked_asm!(concat!( + // Backup all userspace registers to stack + "push eax\n", + push_scratch!(), + push_preserved!(), + + // Enter kernel TLS segment + enter_gs!(), + + // TODO: Map PTI + // $crate::arch::x86::pti::map(); + + // Call inner function with pointer to stack + " + mov ecx, esp + call {inner} + ", + + // TODO: Unmap PTI + // $crate::arch::x86::pti::unmap(); + + // Exit kernel TLS segment + exit_gs!(), + + // Restore all userspace registers + pop_preserved!(), + pop_scratch!(), + + "iretd\n", + ), + inner = sym inner, + ); + } + }; + ($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, |$stack| $code); }; + ($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, |$stack| $code); } +} + +#[macro_export] +macro_rules! interrupt { + ($name:ident, || $code:block) => { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "C" fn inner() { + $code + } + + core::arch::naked_asm!(concat!( + // Backup all userspace registers to stack + "push eax\n", + push_scratch!(), + + // Enter kernel TLS segment + enter_gs!(), + + // TODO: Map PTI + // $crate::arch::x86::pti::map(); + + // Call inner function with pointer to stack + "call {inner}\n", + + // TODO: Unmap PTI + // $crate::arch::x86::pti::unmap(); + + // Exit kernel TLS segment + exit_gs!(), + + // Restore all userspace registers + pop_scratch!(), + + "iretd\n", + ), + inner = sym inner, + ); + } + }; +} + +#[macro_export] +macro_rules! interrupt_error { + ($name:ident, |$stack:ident| $code:block) => { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "C" fn inner($stack: &mut $crate::arch::x86::interrupt::handler::InterruptErrorStack) { + #[allow(unused_unsafe)] + unsafe { + $code + } + } + + core::arch::naked_asm!(concat!( + // Move eax into code's place, put code in last instead (to be + // compatible with InterruptStack) + "xchg [esp], eax\n", + + // Push all userspace registers + push_scratch!(), + push_preserved!(), + + // Enter kernel TLS segment + enter_gs!(), + + // Put code in, it's now in eax + "push eax\n", + + // TODO: Map PTI + // $crate::arch::x86::pti::map(); + + // Call inner function with pointer to stack + " + push esp + call {inner} + ", + // add esp, 4 + + // TODO: Unmap PTI (split "add esp, 8" into two "add esp, 4"s maybe?) + // $crate::arch::x86::pti::unmap(); + + // Pop previous esp and code + "add esp, 8\n", + + // Exit kernel TLS segment + exit_gs!(), + + // Restore all userspace registers + pop_preserved!(), + pop_scratch!(), + + // The error code has already been popped, so use the regular macro. + "iretd\n", + ), + inner = sym inner); + } + }; +} +#[naked] +unsafe extern "C" fn usercopy_trampoline() { + core::arch::naked_asm!( + " + pop esi + pop edi + + mov eax, 1 + ret + " + ); +} + +impl ArchIntCtx for InterruptStack { + fn ip(&self) -> usize { + self.iret.eip + } + fn recover_and_efault(&mut self) { + // Unlike on x86_64, Protected Mode interrupts will not save/restore esp and ss unless + // privilege rings changed, which they won't here as we are catching a kernel-induced page + // fault. + // + // Thus, it is only possible to change scratch/preserved registers, and EIP. While it may + // be feasible to set ECX to zero to stop the REP MOVSB, or increase EIP by 2 (REP MOVSB is + // f3 a4, i.e. 2 bytes), this trampoline allows any memcpy implementation, that reasonably + // pushes preserved registers to the stack. + self.iret.eip = usercopy_trampoline as usize; + } +} + +#[naked] +pub unsafe extern "C" fn enter_usermode() { + core::arch::naked_asm!(concat!( + // TODO: Unmap PTI + // $crate::arch::x86::pti::unmap(); + + // Exit kernel TLS segment + exit_gs!(), + // Restore all userspace registers + pop_preserved!(), + pop_scratch!(), + "iretd\n", + )) +} diff --git a/src/arch/x86/interrupt/irq.rs b/src/arch/x86/interrupt/irq.rs new file mode 100644 index 00000000..0345299e --- /dev/null +++ b/src/arch/x86/interrupt/irq.rs @@ -0,0 +1,308 @@ +use core::sync::atomic::{AtomicUsize, Ordering}; + +use alloc::vec::Vec; + +use crate::{ + context, + context::timeout, + device::{ + ioapic, local_apic, pic, pit, + serial::{COM1, COM2}, + }, + interrupt, interrupt_stack, + ipi::{ipi, IpiKind, IpiTarget}, + scheme::{ + debug::{debug_input, debug_notify}, + serio::serio_input, + }, + time, +}; + +#[cfg(feature = "sys_stat")] +use crate::percpu::PercpuBlock; + +#[repr(u8)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum IrqMethod { + Pic = 0, + Apic = 1, +} + +static SPURIOUS_COUNT_IRQ7: AtomicUsize = AtomicUsize::new(0); +static SPURIOUS_COUNT_IRQ15: AtomicUsize = AtomicUsize::new(0); + +pub fn spurious_count_irq7() -> usize { + SPURIOUS_COUNT_IRQ7.load(Ordering::Relaxed) +} +pub fn spurious_count_irq15() -> usize { + SPURIOUS_COUNT_IRQ15.load(Ordering::Relaxed) +} +pub fn spurious_count() -> usize { + spurious_count_irq7() + spurious_count_irq15() +} +pub fn spurious_irq_resource() -> syscall::Result> { + match irq_method() { + IrqMethod::Apic => Ok(Vec::from(&b"(not implemented for APIC yet)"[..])), + IrqMethod::Pic => Ok(format!( + "{}\tIRQ7\n{}\tIRQ15\n{}\ttotal\n", + spurious_count_irq7(), + spurious_count_irq15(), + spurious_count() + ) + .into_bytes()), + } +} + +static IRQ_METHOD: AtomicUsize = AtomicUsize::new(IrqMethod::Pic as usize); + +pub fn set_irq_method(method: IrqMethod) { + IRQ_METHOD.store(method as usize, core::sync::atomic::Ordering::Release); +} + +fn irq_method() -> IrqMethod { + let raw = IRQ_METHOD.load(core::sync::atomic::Ordering::Acquire); + + match raw { + 0 => IrqMethod::Pic, + 1 => IrqMethod::Apic, + _ => unreachable!(), + } +} + +extern "C" { + // triggers irq scheme + fn irq_trigger(irq: u8); +} + +/// Notify the IRQ scheme that an IRQ has been registered. This should mask the IRQ until the +/// scheme user unmasks it ("acknowledges" it). +unsafe fn trigger(irq: u8) { + match irq_method() { + IrqMethod::Pic => { + if irq < 16 { + pic_mask(irq) + } + } + IrqMethod::Apic => ioapic_mask(irq), + } + irq_trigger(irq); +} + +/// Unmask the IRQ. This is called from the IRQ scheme, which does this when a user process has +/// processed the IRQ. +pub unsafe fn acknowledge(irq: usize) { + match irq_method() { + IrqMethod::Pic => { + if irq < 16 { + pic_unmask(irq) + } + } + IrqMethod::Apic => ioapic_unmask(irq), + } +} + +/// Sends an end-of-interrupt, so that the interrupt controller can go on to the next one. +pub unsafe fn eoi(irq: u8) { + #[cfg(feature = "sys_stat")] + PercpuBlock::current().stats.add_irq(irq); + + match irq_method() { + IrqMethod::Pic => { + if irq < 16 { + pic_eoi(irq) + } + } + IrqMethod::Apic => lapic_eoi(), + } +} + +unsafe fn pic_mask(irq: u8) { + debug_assert!(irq < 16); + + if irq >= 8 { + pic::slave().mask_set(irq - 8); + } else { + pic::master().mask_set(irq); + } +} + +unsafe fn ioapic_mask(irq: u8) { + ioapic::mask(irq); +} + +unsafe fn pic_eoi(irq: u8) { + debug_assert!(irq < 16); + + if irq >= 8 { + pic::master().ack(); + pic::slave().ack(); + } else { + pic::master().ack(); + } +} + +unsafe fn lapic_eoi() { + local_apic::the_local_apic().eoi() +} + +unsafe fn pic_unmask(irq: usize) { + debug_assert!(irq < 16); + + if irq >= 8 { + pic::slave().mask_clear(irq as u8 - 8); + } else { + pic::master().mask_clear(irq as u8); + } +} + +unsafe fn ioapic_unmask(irq: usize) { + ioapic::unmask(irq as u8); +} + +interrupt_stack!(pit_stack, |_stack| { + // Saves CPU time by not sending IRQ event irq_trigger(0); + + { + *time::OFFSET.lock() += pit::RATE; + } + + eoi(0); + + // Wake up other CPUs + ipi(IpiKind::Pit, IpiTarget::Other); + + // Any better way of doing this? + timeout::trigger(); + + // Switch after a sufficient amount of time since the last switch. + context::switch::tick(); +}); + +interrupt!(keyboard, || { + let data: u8; + core::arch::asm!("in al, 0x60", out("al") data); + + eoi(1); + + serio_input(0, data); +}); + +interrupt!(cascade, || { + // No need to do any operations on cascade + eoi(2); +}); + +interrupt!(com2, || { + while let Some(c) = COM2.lock().receive() { + debug_input(c); + } + debug_notify(); + eoi(3); +}); + +interrupt!(com1, || { + while let Some(c) = COM1.lock().receive() { + debug_input(c); + } + debug_notify(); + eoi(4); +}); + +interrupt!(lpt2, || { + trigger(5); + eoi(5); +}); + +interrupt!(floppy, || { + trigger(6); + eoi(6); +}); + +interrupt!(lpt1, || { + if irq_method() == IrqMethod::Pic && pic::master().isr() & (1 << 7) == 0 { + // the IRQ was spurious, ignore it but increment a counter. + SPURIOUS_COUNT_IRQ7.fetch_add(1, Ordering::Relaxed); + return; + } + trigger(7); + eoi(7); +}); + +interrupt!(rtc, || { + trigger(8); + eoi(8); +}); + +interrupt!(pci1, || { + trigger(9); + eoi(9); +}); + +interrupt!(pci2, || { + trigger(10); + eoi(10); +}); + +interrupt!(pci3, || { + trigger(11); + eoi(11); +}); + +interrupt!(mouse, || { + let data: u8; + core::arch::asm!("in al, 0x60", out("al") data); + + eoi(12); + + serio_input(1, data); +}); + +interrupt!(fpu, || { + trigger(13); + eoi(13); +}); + +interrupt!(ata1, || { + trigger(14); + eoi(14); +}); + +interrupt!(ata2, || { + if irq_method() == IrqMethod::Pic && pic::slave().isr() & (1 << 7) == 0 { + SPURIOUS_COUNT_IRQ15.fetch_add(1, Ordering::Relaxed); + pic::master().ack(); + return; + } + trigger(15); + eoi(15); +}); + +interrupt!(lapic_timer, || { + println!("Local apic timer interrupt"); + lapic_eoi(); +}); + +interrupt!(lapic_error, || { + println!( + "Local apic internal error: ESR={:#0x}", + local_apic::the_local_apic().esr() + ); + lapic_eoi(); +}); + +// XXX: This would look way prettier using const generics. + +macro_rules! allocatable_irq( + ( $idt:expr, $number:literal, $name:ident ) => { + interrupt!($name, || { + allocatable_irq_generic($number); + }); + } +); + +pub unsafe fn allocatable_irq_generic(number: u8) { + irq_trigger(number - 32); + lapic_eoi(); +} + +define_default_irqs!(); diff --git a/src/arch/x86/interrupt/mod.rs b/src/arch/x86/interrupt/mod.rs new file mode 100644 index 00000000..430961dd --- /dev/null +++ b/src/arch/x86/interrupt/mod.rs @@ -0,0 +1,12 @@ +//! Interrupt instructions + +pub use crate::arch::x86_shared::interrupt::*; + +#[macro_use] +pub mod handler; + +pub mod exception; +pub mod irq; +pub mod syscall; + +pub use self::handler::InterruptStack; diff --git a/src/arch/x86/interrupt/syscall.rs b/src/arch/x86/interrupt/syscall.rs new file mode 100644 index 00000000..d020daef --- /dev/null +++ b/src/arch/x86/interrupt/syscall.rs @@ -0,0 +1,42 @@ +use crate::{ + ptrace, syscall, + syscall::flag::{PTRACE_FLAG_IGNORE, PTRACE_STOP_POST_SYSCALL, PTRACE_STOP_PRE_SYSCALL}, +}; + +pub unsafe fn init() {} + +macro_rules! with_interrupt_stack { + (|$stack:ident| $code:block) => {{ + let allowed = ptrace::breakpoint_callback(PTRACE_STOP_PRE_SYSCALL, None) + .and_then(|_| ptrace::next_breakpoint().map(|f| !f.contains(PTRACE_FLAG_IGNORE))); + + if allowed.unwrap_or(true) { + // If the syscall is `clone`, the clone won't return here. Instead, + // it'll return early and leave any undropped values. This is + // actually GOOD, because any references are at that point UB + // anyway, because they are based on the wrong stack. + let $stack = &mut *$stack; + $code + } + + ptrace::breakpoint_callback(PTRACE_STOP_POST_SYSCALL, None); + }}; +} + +interrupt_stack!(syscall, |stack| { + with_interrupt_stack!(|stack| { + let scratch = &stack.scratch; + let preserved = &stack.preserved; + let ret = syscall::syscall( + scratch.eax, + preserved.ebx, + scratch.ecx, + scratch.edx, + preserved.esi, + preserved.edi, + ); + stack.scratch.eax = ret; + }) +}); + +pub use super::handler::enter_usermode; diff --git a/src/arch/x86/macros.rs b/src/arch/x86/macros.rs new file mode 100644 index 00000000..5f888cb0 --- /dev/null +++ b/src/arch/x86/macros.rs @@ -0,0 +1,69 @@ +/// Print to console +#[macro_export] +macro_rules! print { + ($($arg:tt)*) => ({ + use core::fmt::Write; + let _ = write!($crate::arch::debug::Writer::new(), $($arg)*); + }); +} + +/// Print with new line to console +#[macro_export] +macro_rules! println { + () => (print!("\n")); + ($fmt:expr) => (print!(concat!($fmt, "\n"))); + ($fmt:expr, $($arg:tt)*) => (print!(concat!($fmt, "\n"), $($arg)*)); +} + +#[macro_export] +macro_rules! irqs( + ( [ $( ($idt:expr, $number:literal, $name:ident) ,)* ], $submac:ident ) => { + $( + $submac!($idt, $number, $name); + )* + } +); + +// define the irq numbers specified in the list above, as functions of the names +// allocatable_irq_NUM. +#[macro_export] +macro_rules! default_irqs( + ($idt:expr, $submac:ident) => { + irqs!([ + // interrupt vectors below 32 are exceptions + // vectors 32..=47 are used for standard 8259 pic irqs. + // 48 and 49 are used for the local APIC timer and error register, respectively. + ($idt, 50, irq_50), ($idt, 51, irq_51), ($idt, 52, irq_52), ($idt, 53, irq_53), ($idt, 54, irq_54), ($idt, 55, irq_55), ($idt, 56, irq_56), ($idt, 57, irq_57), ($idt, 58, irq_58), ($idt, 59, irq_59), + ($idt, 60, irq_60), ($idt, 61, irq_61), ($idt, 62, irq_62), ($idt, 63, irq_63), + // 64..=67 used for IPI + ($idt, 68, irq_68), ($idt, 69, irq_69), + ($idt, 70, irq_70), ($idt, 71, irq_71), ($idt, 72, irq_72), ($idt, 73, irq_73), ($idt, 74, irq_74), ($idt, 75, irq_75), ($idt, 76, irq_76), ($idt, 77, irq_77), ($idt, 78, irq_78), ($idt, 79, irq_79), + ($idt, 80, irq_80), ($idt, 81, irq_81), ($idt, 82, irq_82), ($idt, 83, irq_83), ($idt, 84, irq_84), ($idt, 85, irq_85), ($idt, 86, irq_86), ($idt, 87, irq_87), ($idt, 88, irq_88), ($idt, 89, irq_89), + ($idt, 90, irq_90), ($idt, 91, irq_91), ($idt, 92, irq_92), ($idt, 93, irq_93), ($idt, 94, irq_94), ($idt, 95, irq_95), ($idt, 96, irq_96), ($idt, 97, irq_97), ($idt, 98, irq_98), ($idt, 99, irq_99), + ($idt, 100, irq_100), ($idt, 101, irq_101), ($idt, 102, irq_102), ($idt, 103, irq_103), ($idt, 104, irq_104), ($idt, 105, irq_105), ($idt, 106, irq_106), ($idt, 107, irq_107), ($idt, 108, irq_108), ($idt, 109, irq_109), + ($idt, 110, irq_110), ($idt, 111, irq_111), ($idt, 112, irq_112), ($idt, 113, irq_113), ($idt, 114, irq_114), ($idt, 115, irq_115), ($idt, 116, irq_116), ($idt, 117, irq_117), ($idt, 118, irq_118), ($idt, 119, irq_119), + ($idt, 120, irq_120), ($idt, 121, irq_121), ($idt, 122, irq_122), ($idt, 123, irq_123), ($idt, 124, irq_124), ($idt, 125, irq_125), ($idt, 126, irq_126), ($idt, 127, irq_127), + // 128 is used for software interrupts + ($idt, 129, irq_129), + ($idt, 130, irq_130), ($idt, 131, irq_131), ($idt, 132, irq_132), ($idt, 133, irq_133), ($idt, 134, irq_134), ($idt, 135, irq_135), ($idt, 136, irq_136), ($idt, 137, irq_137), ($idt, 138, irq_138), ($idt, 139, irq_139), + ($idt, 140, irq_140), ($idt, 141, irq_141), ($idt, 142, irq_142), ($idt, 143, irq_143), ($idt, 144, irq_144), ($idt, 145, irq_145), ($idt, 146, irq_146), ($idt, 147, irq_147), ($idt, 148, irq_148), ($idt, 149, irq_149), + ($idt, 150, irq_150), ($idt, 151, irq_151), ($idt, 152, irq_152), ($idt, 153, irq_153), ($idt, 154, irq_154), ($idt, 155, irq_155), ($idt, 156, irq_156), ($idt, 157, irq_157), ($idt, 158, irq_158), ($idt, 159, irq_159), + ($idt, 160, irq_160), ($idt, 161, irq_161), ($idt, 162, irq_162), ($idt, 163, irq_163), ($idt, 164, irq_164), ($idt, 165, irq_165), ($idt, 166, irq_166), ($idt, 167, irq_167), ($idt, 168, irq_168), ($idt, 169, irq_169), + ($idt, 170, irq_170), ($idt, 171, irq_171), ($idt, 172, irq_172), ($idt, 173, irq_173), ($idt, 174, irq_174), ($idt, 175, irq_175), ($idt, 176, irq_176), ($idt, 177, irq_177), ($idt, 178, irq_178), ($idt, 179, irq_179), + ($idt, 180, irq_180), ($idt, 181, irq_181), ($idt, 182, irq_182), ($idt, 183, irq_183), ($idt, 184, irq_184), ($idt, 185, irq_185), ($idt, 186, irq_186), ($idt, 187, irq_187), ($idt, 188, irq_188), ($idt, 189, irq_189), + ($idt, 190, irq_190), ($idt, 191, irq_191), ($idt, 192, irq_192), ($idt, 193, irq_193), ($idt, 194, irq_194), ($idt, 195, irq_195), ($idt, 196, irq_196), ($idt, 197, irq_197), ($idt, 198, irq_198), ($idt, 199, irq_199), + ($idt, 200, irq_200), ($idt, 201, irq_201), ($idt, 202, irq_202), ($idt, 203, irq_203), ($idt, 204, irq_204), ($idt, 205, irq_205), ($idt, 206, irq_206), ($idt, 207, irq_207), ($idt, 208, irq_208), ($idt, 209, irq_209), + ($idt, 210, irq_210), ($idt, 211, irq_211), ($idt, 212, irq_212), ($idt, 213, irq_213), ($idt, 214, irq_214), ($idt, 215, irq_215), ($idt, 216, irq_216), ($idt, 217, irq_217), ($idt, 218, irq_218), ($idt, 219, irq_219), + ($idt, 220, irq_220), ($idt, 221, irq_221), ($idt, 222, irq_222), ($idt, 223, irq_223), ($idt, 224, irq_224), ($idt, 225, irq_225), ($idt, 226, irq_226), ($idt, 227, irq_227), ($idt, 228, irq_228), ($idt, 229, irq_229), + ($idt, 230, irq_230), ($idt, 231, irq_231), ($idt, 232, irq_232), ($idt, 233, irq_233), ($idt, 234, irq_234), ($idt, 235, irq_235), ($idt, 236, irq_236), ($idt, 237, irq_237), ($idt, 238, irq_238), ($idt, 239, irq_239), + ($idt, 240, irq_240), ($idt, 241, irq_241), ($idt, 242, irq_242), ($idt, 243, irq_243), ($idt, 244, irq_244), ($idt, 245, irq_245), ($idt, 246, irq_246), ($idt, 247, irq_247), ($idt, 248, irq_248), ($idt, 249, irq_249), + ($idt, 250, irq_250), ($idt, 251, irq_251), ($idt, 252, irq_252), ($idt, 253, irq_253), ($idt, 254, irq_254), ($idt, 255, irq_255), + ], $submac); + } +); + +macro_rules! define_default_irqs( + () => { + default_irqs!((), allocatable_irq); + } +); diff --git a/src/arch/x86/mod.rs b/src/arch/x86/mod.rs new file mode 100644 index 00000000..a3f78082 --- /dev/null +++ b/src/arch/x86/mod.rs @@ -0,0 +1,60 @@ +pub use crate::arch::x86_shared::*; + +#[macro_use] +pub mod macros; + +/// Constants like memory locations +pub mod consts; + +/// Global descriptor table +pub mod gdt; + +/// Interrupt instructions +#[macro_use] +pub mod interrupt; + +/// Paging +pub mod paging; + +pub mod rmm; + +/// Initialization and start function +pub mod start; + +pub use ::rmm::X86Arch as CurrentRmmArch; + +// Flags +pub mod flags { + pub const SHIFT_SINGLESTEP: usize = 8; + pub const FLAG_SINGLESTEP: usize = 1 << SHIFT_SINGLESTEP; +} + +#[naked] +#[link_section = ".usercopy-fns"] +pub unsafe extern "C" fn arch_copy_to_user(dst: usize, src: usize, len: usize) -> u8 { + core::arch::naked_asm!( + " + push edi + push esi + + mov edi, [esp + 12] # dst + mov esi, [esp + 16] # src + mov ecx, [esp + 20] # len + rep movsb + + pop esi + pop edi + + xor eax, eax + ret + " + ); +} +pub use arch_copy_to_user as arch_copy_from_user; + +pub const KFX_SIZE: usize = 512; + +// This function exists as the KFX size is dynamic on x86_64. +pub fn kfx_size() -> usize { + KFX_SIZE +} diff --git a/src/arch/x86/paging/mapper.rs b/src/arch/x86/paging/mapper.rs new file mode 100644 index 00000000..73dd4893 --- /dev/null +++ b/src/arch/x86/paging/mapper.rs @@ -0,0 +1,22 @@ +use crate::ipi::{ipi, IpiKind, IpiTarget}; + +use super::RmmA; + +pub use rmm::{Flusher, PageFlush, PageFlushAll}; + +pub struct InactiveFlusher { + _inner: (), +} +impl Flusher for InactiveFlusher { + fn consume(&mut self, flush: PageFlush) { + // TODO: Push to TLB "mailbox" or tell it to reload CR3 if there are too many entries. + unsafe { + flush.ignore(); + } + } +} +impl Drop for InactiveFlusher { + fn drop(&mut self) { + ipi(IpiKind::Tlb, IpiTarget::Other); + } +} diff --git a/src/arch/x86/paging/mod.rs b/src/arch/x86/paging/mod.rs new file mode 100644 index 00000000..d2f96622 --- /dev/null +++ b/src/arch/x86/paging/mod.rs @@ -0,0 +1,130 @@ +//! # Paging +//! Some code was borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) + +use x86::msr; + +pub use super::CurrentRmmArch as RmmA; +pub use rmm::{Arch as RmmArch, PageFlags, PhysicalAddress, TableKind, VirtualAddress}; + +pub type PageMapper = rmm::PageMapper; + +pub mod entry { + bitflags! { + pub struct EntryFlags: usize { + const NO_CACHE = 1 << 4; + const HUGE_PAGE = 1 << 7; + const GLOBAL = 1 << 8; + const DEV_MEM = 0; + } + } +} + +pub mod mapper; + +/// Size of pages +pub const PAGE_SIZE: usize = RmmA::PAGE_SIZE; +pub const PAGE_MASK: usize = RmmA::PAGE_OFFSET_MASK; + +/// Setup page attribute table +#[cold] +unsafe fn init_pat() { + let uncacheable = 0; + let write_combining = 1; + let write_through = 4; + //let write_protected = 5; + let write_back = 6; + let uncached = 7; + + let pat0 = write_back; + let pat1 = write_through; + let pat2 = uncached; + let pat3 = uncacheable; + + let pat4 = write_combining; + let pat5 = pat1; + let pat6 = pat2; + let pat7 = pat3; + + msr::wrmsr( + msr::IA32_PAT, + pat7 << 56 + | pat6 << 48 + | pat5 << 40 + | pat4 << 32 + | pat3 << 24 + | pat2 << 16 + | pat1 << 8 + | pat0, + ); +} + +#[cold] +pub unsafe fn init() { + init_pat(); +} + +/// Page +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Page { + number: usize, +} + +impl Page { + pub fn start_address(self) -> VirtualAddress { + VirtualAddress::new(self.number * PAGE_SIZE) + } + + pub fn containing_address(address: VirtualAddress) -> Page { + //TODO assert!(address.data() < 0x0000_8000_0000_0000 || address.data() >= 0xffff_8000_0000_0000, + // "invalid address: 0x{:x}", address.data()); + Page { + number: address.data() / PAGE_SIZE, + } + } + + pub fn range_inclusive(start: Page, r#final: Page) -> PageIter { + PageIter { + start, + end: r#final.next(), + } + } + pub fn next(self) -> Page { + self.next_by(1) + } + pub fn next_by(self, n: usize) -> Page { + Self { + number: self.number + n, + } + } + pub fn offset_from(self, other: Self) -> usize { + self.number - other.number + } +} + +pub struct PageIter { + start: Page, + end: Page, +} + +impl Iterator for PageIter { + type Item = Page; + + fn next(&mut self) -> Option { + if self.start < self.end { + let page = self.start; + self.start = self.start.next(); + Some(page) + } else { + None + } + } +} + +/// Round down to the nearest multiple of page size +pub fn round_down_pages(number: usize) -> usize { + number - number % PAGE_SIZE +} +/// Round up to the nearest multiple of page size +pub fn round_up_pages(number: usize) -> usize { + round_down_pages(number + PAGE_SIZE - 1) +} diff --git a/src/arch/x86/rmm.rs b/src/arch/x86/rmm.rs new file mode 100644 index 00000000..453382c4 --- /dev/null +++ b/src/arch/x86/rmm.rs @@ -0,0 +1,17 @@ +use rmm::{Arch, PageFlags, VirtualAddress}; + +pub unsafe fn page_flags(virt: VirtualAddress) -> PageFlags { + use crate::kernel_executable_offsets::*; + let virt_addr = virt.data(); + + if virt_addr >= __text_start() && virt_addr < __text_end() { + // Remap text read-only, execute + PageFlags::new().execute(true) + } else if virt_addr >= __rodata_start() && virt_addr < __rodata_end() { + // Remap rodata read-only, no execute + PageFlags::new() + } else { + // Remap everything else read-write, no execute + PageFlags::new().write(true) + } +} diff --git a/src/arch/x86/start.rs b/src/arch/x86/start.rs new file mode 100644 index 00000000..3dcfda36 --- /dev/null +++ b/src/arch/x86/start.rs @@ -0,0 +1,289 @@ +/// This function is where the kernel sets up IRQ handlers +/// It is increcibly unsafe, and should be minimal in nature +/// It must create the IDT with the correct entries, those entries are +/// defined in other files inside of the `arch` module +use core::slice; +use core::sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering}; +use log::info; + +#[cfg(feature = "acpi")] +use crate::acpi; +#[cfg(feature = "graphical_debug")] +use crate::devices::graphical_debug; +use crate::{ + allocator, + cpu_set::LogicalCpuId, + device, gdt, idt, interrupt, + paging::{self, PhysicalAddress, RmmA, RmmArch, TableKind}, + startup::memory::{register_bootloader_areas, register_memory_region, BootloaderMemoryKind}, +}; + +/// Test of zero values in BSS. +static mut BSS_TEST_ZERO: usize = 0; +/// Test of non-zero values in data. +static mut DATA_TEST_NONZERO: usize = usize::max_value(); + +pub static KERNEL_BASE: AtomicUsize = AtomicUsize::new(0); +pub static KERNEL_SIZE: AtomicUsize = AtomicUsize::new(0); + +// TODO: This probably shouldn't be an atomic. Only the BSP starts APs. +pub static CPU_COUNT: AtomicU32 = AtomicU32::new(0); + +pub static AP_READY: AtomicBool = AtomicBool::new(false); +static BSP_READY: AtomicBool = AtomicBool::new(false); + +#[repr(C, packed(8))] +pub struct KernelArgs { + kernel_base: u64, + kernel_size: u64, + stack_base: u64, + stack_size: u64, + env_base: u64, + env_size: u64, + + /// The base pointer to the saved RSDP. + /// + /// This field can be NULL, and if so, the system has not booted with UEFI or in some other way + /// retrieved the RSDPs. The kernel or a userspace driver will thus try searching the BIOS + /// memory instead. On UEFI systems, searching is not guaranteed to actually work though. + acpi_rsdp_base: u64, + /// The size of the RSDP region. + acpi_rsdp_size: u64, + + areas_base: u64, + areas_size: u64, + + /// The physical base 64-bit pointer to the contiguous bootstrap/initfs. + bootstrap_base: u64, + /// Size of contiguous bootstrap/initfs physical region, not necessarily page aligned. + bootstrap_size: u64, +} + +/// The entry to Rust, all things must be initialized +#[no_mangle] +pub unsafe extern "C" fn kstart(args_ptr: *const KernelArgs) -> ! { + let bootstrap = { + let args = args_ptr.read(); + + // BSS should already be zero + { + assert_eq!(BSS_TEST_ZERO, 0); + assert_eq!(DATA_TEST_NONZERO, usize::max_value()); + } + + KERNEL_BASE.store(args.kernel_base as usize, Ordering::SeqCst); + KERNEL_SIZE.store(args.kernel_size as usize, Ordering::SeqCst); + + // Convert env to slice + let env = slice::from_raw_parts( + (args.env_base as usize + crate::PHYS_OFFSET) as *const u8, + args.env_size as usize, + ); + + // Set up serial debug + #[cfg(feature = "serial_debug")] + device::serial::init(); + + // Set up graphical debug + #[cfg(feature = "graphical_debug")] + graphical_debug::init(env); + + #[cfg(feature = "system76_ec_debug")] + device::system76_ec::init(); + + // Initialize logger + crate::log::init_logger(|r| { + use core::fmt::Write; + let _ = writeln!( + super::debug::Writer::new(), + "{}:{} -- {}", + r.target(), + r.level(), + r.args() + ); + }); + + info!("Redox OS starting..."); + info!( + "Kernel: {:X}:{:X}", + { args.kernel_base }, + { args.kernel_base } + { args.kernel_size } + ); + info!( + "Stack: {:X}:{:X}", + { args.stack_base }, + { args.stack_base } + { args.stack_size } + ); + info!( + "Env: {:X}:{:X}", + { args.env_base }, + { args.env_base } + { args.env_size } + ); + info!( + "RSDP: {:X}:{:X}", + { args.acpi_rsdp_base }, + { args.acpi_rsdp_base } + { args.acpi_rsdp_size } + ); + info!( + "Areas: {:X}:{:X}", + { args.areas_base }, + { args.areas_base } + { args.areas_size } + ); + info!( + "Bootstrap: {:X}:{:X}", + { args.bootstrap_base }, + { args.bootstrap_base } + { args.bootstrap_size } + ); + + // Set up GDT before paging + gdt::init(); + + // Set up IDT before paging + idt::init(); + + // Initialize RMM + register_bootloader_areas(args.areas_base as usize, args.areas_size as usize); + + register_memory_region( + args.kernel_base as usize, + args.kernel_size as usize, + BootloaderMemoryKind::Kernel, + ); + register_memory_region( + args.stack_base as usize, + args.stack_size as usize, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.env_base as usize, + args.env_size as usize, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.acpi_rsdp_base as usize, + args.acpi_rsdp_size as usize, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.bootstrap_base as usize, + args.bootstrap_size as usize, + BootloaderMemoryKind::IdentityMap, + ); + crate::startup::memory::init(Some(0x100000), Some(0x40000000)); + + // Initialize paging + paging::init(); + + // Set up GDT after paging with TLS + gdt::init_paging( + args.stack_base as usize + args.stack_size as usize, + LogicalCpuId::BSP, + ); + + // Set up IDT + idt::init_paging_bsp(); + + // Set up syscall instruction + interrupt::syscall::init(); + + // Reset AP variables + CPU_COUNT.store(1, Ordering::SeqCst); + AP_READY.store(false, Ordering::SeqCst); + BSP_READY.store(false, Ordering::SeqCst); + + // Setup kernel heap + allocator::init(); + + // Set up double buffer for graphical debug now that heap is available + #[cfg(feature = "graphical_debug")] + graphical_debug::init_heap(); + + idt::init_paging_post_heap(LogicalCpuId::BSP); + + // Activate memory logging + crate::log::init(); + + // Initialize devices + device::init(); + + // Read ACPI tables, starts APs + #[cfg(feature = "acpi")] + { + acpi::init(if args.acpi_rsdp_base != 0 { + Some((args.acpi_rsdp_base as usize + crate::PHYS_OFFSET) as *const u8) + } else { + None + }); + device::init_after_acpi(); + } + + // Initialize all of the non-core devices not otherwise needed to complete initialization + device::init_noncore(); + + BSP_READY.store(true, Ordering::SeqCst); + + crate::Bootstrap { + base: crate::memory::Frame::containing(crate::paging::PhysicalAddress::new( + args.bootstrap_base as usize, + )), + page_count: (args.bootstrap_size as usize) / crate::memory::PAGE_SIZE, + env, + } + }; + + crate::kmain(CPU_COUNT.load(Ordering::SeqCst), bootstrap); +} + +#[repr(C, packed)] +pub struct KernelArgsAp { + cpu_id: u64, + page_table: u64, + stack_start: u64, + stack_end: u64, +} + +/// Entry to rust for an AP +pub unsafe extern "C" fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! { + let cpu_id = { + let args = &*args_ptr; + let cpu_id = LogicalCpuId::new(args.cpu_id as u32); + let bsp_table = args.page_table as usize; + let _stack_start = args.stack_start as usize; + let stack_end = args.stack_end as usize; + + assert_eq!(BSS_TEST_ZERO, 0); + assert_eq!(DATA_TEST_NONZERO, usize::max_value()); + + // Set up GDT before paging + gdt::init(); + + // Set up IDT before paging + idt::init(); + + // Initialize paging + RmmA::set_table(TableKind::Kernel, PhysicalAddress::new(bsp_table)); + paging::init(); + + // Set up GDT with TLS + gdt::init_paging(stack_end, cpu_id); + + // Set up IDT for AP + idt::init_paging_post_heap(cpu_id); + + // Set up syscall instruction + interrupt::syscall::init(); + + // Initialize devices (for AP) + device::init_ap(); + + AP_READY.store(true, Ordering::SeqCst); + + cpu_id + }; + + while !BSP_READY.load(Ordering::SeqCst) { + interrupt::pause(); + } + + crate::kmain_ap(cpu_id); +} diff --git a/src/arch/x86_64/alternative.rs b/src/arch/x86_64/alternative.rs new file mode 100644 index 00000000..ee59b30a --- /dev/null +++ b/src/arch/x86_64/alternative.rs @@ -0,0 +1,300 @@ +#![allow(unused_imports)] + +use core::mem::size_of; + +use spin::Once; +use x86::controlregs::{Cr4, Xcr0}; + +use crate::{ + context::memory::PageSpan, + cpuid::{cpuid, feature_info, has_ext_feat}, + memory::KernelMapper, + paging::{Page, PageFlags, VirtualAddress, PAGE_SIZE}, +}; + +#[cfg(all(cpu_feature_never = "xsave", not(cpu_feature_never = "xsaveopt")))] +compile_error!("cannot force-disable xsave without force-disabling xsaveopt"); + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct AltReloc { + // These two fields point to a utf-8 name of the feature, see the match statement below. + pub name_start: *const u8, + pub name_len: usize, + + // Base address of the code that may later be overwritten. + pub code_start: *mut u8, + // Length of the default code, excluding NOPs if the altcode sequence is longer. + pub origcode_len: usize, + // Actual length of the overwritable code, i.e. max(origcode_len, altcode_len). + pub padded_len: usize, + pub _rsvd: usize, + + // These two fields point to the alternative code (in .rodata), and possible new nop bytes, + // that will replace the code_start..+padded_len + pub altcode_start: *const u8, + pub altcode_len: usize, +} + +#[cold] +pub unsafe fn early_init(bsp: bool) { + let relocs_offset = crate::kernel_executable_offsets::__altrelocs_start(); + let relocs_size = crate::kernel_executable_offsets::__altrelocs_end() - relocs_offset; + + assert_eq!(relocs_size % size_of::(), 0); + let relocs = core::slice::from_raw_parts( + relocs_offset as *const AltReloc, + relocs_size / size_of::(), + ); + + let mut enable = KcpuFeatures::empty(); + + if cfg!(not(cpu_feature_never = "smap")) && has_ext_feat(|feat| feat.has_smap()) { + // SMAP (Supervisor-Mode Access Prevention) forbids the kernel from accessing any + // userspace-accessible pages, with the necessary exception of when RFLAGS.AC = 1. This + // limits user-memory accesses to the UserSlice wrapper, so that no data outside of + // usercopy functions can be accidentally accessed by the kernel. + x86::controlregs::cr4_write(x86::controlregs::cr4() | Cr4::CR4_ENABLE_SMAP); + // Clear CLAC in (the probably unlikely) case the bootloader set it earlier. + x86::bits64::rflags::clac(); + + enable |= KcpuFeatures::SMAP; + } else { + assert!(cfg!(not(cpu_feature_always = "smap"))); + } + + if cfg!(not(cpu_feature_never = "fsgsbase")) + && let Some(f) = cpuid().get_extended_feature_info() + && f.has_fsgsbase() + { + x86::controlregs::cr4_write( + x86::controlregs::cr4() | x86::controlregs::Cr4::CR4_ENABLE_FSGSBASE, + ); + + enable |= KcpuFeatures::FSGSBASE; + } else { + assert!(cfg!(not(cpu_feature_always = "fsgsbase"))); + } + + #[cfg(not(cpu_feature_never = "xsave"))] + if feature_info().has_xsave() { + use raw_cpuid::{ExtendedRegisterStateLocation, ExtendedRegisterType}; + + x86::controlregs::cr4_write( + x86::controlregs::cr4() | x86::controlregs::Cr4::CR4_ENABLE_OS_XSAVE, + ); + + let mut xcr0 = Xcr0::XCR0_FPU_MMX_STATE | Xcr0::XCR0_SSE_STATE; + x86::controlregs::xcr0_write(xcr0); + let ext_state_info = cpuid() + .get_extended_state_info() + .expect("must be present if XSAVE is supported"); + + enable |= KcpuFeatures::XSAVE; + enable.set(KcpuFeatures::XSAVEOPT, ext_state_info.has_xsaveopt()); + + let info = xsave::XsaveInfo { + ymm_upper_offset: feature_info().has_avx().then(|| { + xcr0 |= Xcr0::XCR0_AVX_STATE; + x86::controlregs::xcr0_write(xcr0); + + let state = ext_state_info + .iter() + .find(|state| { + state.register() == ExtendedRegisterType::Avx + && state.location() == ExtendedRegisterStateLocation::Xcr0 + }) + .expect("CPUID said AVX was supported but there's no state info"); + + if state.size() as usize != 16 * core::mem::size_of::() { + log::warn!("Unusual AVX state size {}", state.size()); + } + + state.offset() + }), + xsave_size: ext_state_info.xsave_area_size_enabled_features(), + }; + log::debug!("XSAVE: {:?}", info); + + xsave::XSAVE_INFO.call_once(|| info); + } else { + assert!(cfg!(not(cpu_feature_always = "xsave"))); + } + + if !bsp { + return; + } + + #[cfg(feature = "self_modifying")] + overwrite(&relocs, enable); + + #[cfg(not(feature = "self_modifying"))] + let _ = relocs; + + if cfg!(not(feature = "self_modifying")) { + assert!( + cfg!(not(cpu_feature_auto = "smap")) + && cfg!(not(cpu_feature_auto = "fsgsbase")) + && cfg!(not(cpu_feature_auto = "xsave")) + && cfg!(not(cpu_feature_auto = "xsaveopt")) + ); + } + + FEATURES.call_once(|| enable); +} + +#[cfg(feature = "self_modifying")] +unsafe fn overwrite(relocs: &[AltReloc], enable: KcpuFeatures) { + log::info!("self-modifying features: {:?}", enable); + + let mut mapper = KernelMapper::lock(); + for reloc in relocs.iter().copied() { + let name = core::str::from_utf8(core::slice::from_raw_parts( + reloc.name_start, + reloc.name_len, + )) + .expect("invalid feature name"); + let altcode = core::slice::from_raw_parts(reloc.altcode_start, reloc.altcode_len); + + let dst_pages = PageSpan::between( + Page::containing_address(VirtualAddress::new(reloc.code_start as usize)), + Page::containing_address(VirtualAddress::new( + (reloc.code_start as usize + reloc.padded_len).next_multiple_of(PAGE_SIZE), + )), + ); + for page in dst_pages.pages() { + mapper + .get_mut() + .unwrap() + .remap( + page.start_address(), + PageFlags::new().write(true).execute(true).global(true), + ) + .unwrap() + .flush(); + } + + let code = core::slice::from_raw_parts_mut(reloc.code_start, reloc.padded_len); + + log::trace!( + "feature {} current {:x?} altcode {:x?}", + name, + code, + altcode + ); + + let feature_is_enabled = match name { + "smap" => enable.contains(KcpuFeatures::SMAP), + "fsgsbase" => enable.contains(KcpuFeatures::FSGSBASE), + "xsave" => enable.contains(KcpuFeatures::XSAVE), + "xsaveopt" => enable.contains(KcpuFeatures::XSAVEOPT), + //_ => panic!("unknown altcode relocation: {}", name), + _ => true, + }; + + // XXX: The `.nops` directive only works for constant lengths, and the variable `.skip -X` + // only outputs the (slower) single-byte 0x90 NOP. + + // This table is from the "Software Optimization Guide for AMD Family 19h Processors" (November + // 2020). + const NOPS_TABLE: [&[u8]; 11] = [ + &[0x90], + &[0x66, 0x90], + &[0x0f, 0x1f, 0x00], + &[0x0f, 0x1f, 0x40, 0x00], + &[0x0f, 0x1f, 0x44, 0x00, 0x00], + &[0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00], + &[0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00], + &[0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00], + &[0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00], + &[0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00], + &[ + 0x66, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, + ], + ]; + + if feature_is_enabled { + log::trace!("feature {} origcode {:x?}", name, code); + let (dst, dst_nops) = code.split_at_mut(altcode.len()); + dst.copy_from_slice(altcode); + + for chunk in dst_nops.chunks_mut(NOPS_TABLE.len()) { + chunk.copy_from_slice(NOPS_TABLE[chunk.len() - 1]); + } + log::trace!("feature {} new {:x?} altcode {:x?}", name, code, altcode); + } else { + log::trace!("feature !{} origcode {:x?}", name, code); + let (_, padded) = code.split_at_mut(reloc.origcode_len); + + // Not strictly necessary, but reduces the number of instructions using longer nop + // instructions. + for chunk in padded.chunks_mut(NOPS_TABLE.len()) { + chunk.copy_from_slice(NOPS_TABLE[chunk.len() - 1]); + } + + log::trace!("feature !{} new {:x?}", name, code); + } + + for page in dst_pages.pages() { + mapper + .get_mut() + .unwrap() + .remap( + page.start_address(), + PageFlags::new().write(false).execute(true).global(true), + ) + .unwrap() + .flush(); + } + } +} + +bitflags! { + #[derive(Clone, Copy, Debug)] + pub struct KcpuFeatures: usize { + const SMAP = 1; + const FSGSBASE = 2; + const XSAVE = 4; + const XSAVEOPT = 8; + } +} + +static FEATURES: Once = Once::new(); + +pub fn features() -> KcpuFeatures { + *FEATURES.get().expect("early_cpu_init was not called") +} + +#[cfg(not(cpu_feature_never = "xsave"))] +mod xsave { + use super::*; + + #[derive(Debug)] + pub struct XsaveInfo { + pub ymm_upper_offset: Option, + pub xsave_size: u32, + } + pub(super) static XSAVE_INFO: Once = Once::new(); + + pub fn info() -> Option<&'static XsaveInfo> { + XSAVE_INFO.get() + } +} + +pub fn kfx_size() -> usize { + #[cfg(not(cpu_feature_never = "xsave"))] + { + match xsave::info() { + Some(info) => FXSAVE_SIZE + XSAVE_HEADER_SIZE + info.xsave_size as usize, + None => FXSAVE_SIZE, + } + } + #[cfg(cpu_feature_never = "xsave")] + { + // FXSAVE size + FXSAVE_SIZE + } +} + +pub const FXSAVE_SIZE: usize = 512; +pub const XSAVE_HEADER_SIZE: usize = 64; diff --git a/src/arch/x86_64/consts.rs b/src/arch/x86_64/consts.rs new file mode 100644 index 00000000..fda5ed1b --- /dev/null +++ b/src/arch/x86_64/consts.rs @@ -0,0 +1,34 @@ +// Because the memory map is so important to not be aliased, it is defined here, in one place. +// +// - The lower half (256 PML4 entries; 128 TiB) is reserved for userspace. These mappings are +// associated with _address spaces_, and change when context switching, unless the address spaces +// match. +// - The upper half is reserved for the kernel. Kernel mappings are preserved across context +// switches. +// +// Each PML4 entry references 512 GiB of virtual memory. + +/// The size of a single PML4 +pub const PML4_SIZE: usize = 0x0000_0080_0000_0000; +pub const PML4_MASK: usize = 0x0000_ff80_0000_0000; + +/// Offset of kernel +pub const KERNEL_MAX_SIZE: usize = 1_usize << 31; +pub const KERNEL_OFFSET: usize = KERNEL_MAX_SIZE.wrapping_neg(); +pub const KERNEL_PML4: usize = (KERNEL_OFFSET & PML4_MASK) / PML4_SIZE; + +/// Offset to kernel heap +pub const KERNEL_HEAP_OFFSET: usize = KERNEL_OFFSET - PML4_SIZE; +pub const KERNEL_HEAP_PML4: usize = (KERNEL_HEAP_OFFSET & PML4_MASK) / PML4_SIZE; +/// Size of kernel heap +pub const KERNEL_HEAP_SIZE: usize = 1 * 1024 * 1024; // 1 MB + +/// Offset of physmap +// This needs to match RMM's PHYS_OFFSET +pub const PHYS_OFFSET: usize = 0xFFFF_8000_0000_0000; +pub const PHYS_PML4: usize = (PHYS_OFFSET & PML4_MASK) / PML4_SIZE; + +/// End offset of the user image, i.e. kernel start +// TODO: Make this offset at least PAGE_SIZE less? There are known hardware bugs on some arches, +// for example on x86 if instructions execute near the 48-bit canonical address boundary. +pub const USER_END_OFFSET: usize = 256 * PML4_SIZE; diff --git a/src/arch/x86_64/cpuid.rs b/src/arch/x86_64/cpuid.rs new file mode 100644 index 00000000..0901c7ab --- /dev/null +++ b/src/arch/x86_64/cpuid.rs @@ -0,0 +1,13 @@ +use raw_cpuid::{ExtendedFeatures, FeatureInfo}; + +pub use crate::arch::x86_shared::cpuid::*; + +pub fn feature_info() -> FeatureInfo { + cpuid() + .get_feature_info() + .expect("x86_64 requires CPUID leaf=0x01 to be present") +} + +pub fn has_ext_feat(feat: impl FnOnce(ExtendedFeatures) -> bool) -> bool { + cpuid().get_extended_feature_info().map_or(false, feat) +} diff --git a/src/arch/x86_64/device/cpu.rs b/src/arch/x86_64/device/cpu.rs deleted file mode 100644 index 516ebfe2..00000000 --- a/src/arch/x86_64/device/cpu.rs +++ /dev/null @@ -1,126 +0,0 @@ -extern crate raw_cpuid; - -use core::fmt::{Result, Write}; - -use self::raw_cpuid::CpuId; - -pub fn cpu_info(w: &mut W) -> Result { - let cpuid = CpuId::new(); - - if let Some(info) = cpuid.get_vendor_info() { - write!(w, "Vendor: {}\n", info.as_string())?; - } - - if let Some(info) = cpuid.get_extended_function_info() { - if let Some(brand) = info.processor_brand_string() { - write!(w, "Model: {}\n", brand)?; - } - } - - if let Some(info) = cpuid.get_processor_frequency_info() { - write!(w, "CPU Base MHz: {}\n", info.processor_base_frequency())?; - write!(w, "CPU Max MHz: {}\n", info.processor_max_frequency())?; - write!(w, "Bus MHz: {}\n", info.bus_frequency())?; - } - - write!(w, "Features:")?; - - if let Some(info) = cpuid.get_feature_info() { - if info.has_fpu() { write!(w, " fpu")? }; - if info.has_vme() { write!(w, " vme")? }; - if info.has_de() { write!(w, " de")? }; - if info.has_pse() { write!(w, " pse")? }; - if info.has_tsc() { write!(w, " tsc")? }; - if info.has_msr() { write!(w, " msr")? }; - if info.has_pae() { write!(w, " pae")? }; - if info.has_mce() { write!(w, " mce")? }; - - if info.has_cmpxchg8b() { write!(w, " cx8")? }; - if info.has_apic() { write!(w, " apic")? }; - if info.has_sysenter_sysexit() { write!(w, " sep")? }; - if info.has_mtrr() { write!(w, " mtrr")? }; - if info.has_pge() { write!(w, " pge")? }; - if info.has_mca() { write!(w, " mca")? }; - if info.has_cmov() { write!(w, " cmov")? }; - if info.has_pat() { write!(w, " pat")? }; - - if info.has_pse36() { write!(w, " pse36")? }; - if info.has_psn() { write!(w, " psn")? }; - if info.has_clflush() { write!(w, " clflush")? }; - if info.has_ds() { write!(w, " ds")? }; - if info.has_acpi() { write!(w, " acpi")? }; - if info.has_mmx() { write!(w, " mmx")? }; - if info.has_fxsave_fxstor() { write!(w, " fxsr")? }; - if info.has_sse() { write!(w, " sse")? }; - - if info.has_sse2() { write!(w, " sse2")? }; - if info.has_ss() { write!(w, " ss")? }; - if info.has_htt() { write!(w, " ht")? }; - if info.has_tm() { write!(w, " tm")? }; - if info.has_pbe() { write!(w, " pbe")? }; - - if info.has_sse3() { write!(w, " sse3")? }; - if info.has_pclmulqdq() { write!(w, " pclmulqdq")? }; - if info.has_ds_area() { write!(w, " dtes64")? }; - if info.has_monitor_mwait() { write!(w, " monitor")? }; - if info.has_cpl() { write!(w, " ds_cpl")? }; - if info.has_vmx() { write!(w, " vmx")? }; - if info.has_smx() { write!(w, " smx")? }; - if info.has_eist() { write!(w, " est")? }; - - if info.has_tm2() { write!(w, " tm2")? }; - if info.has_ssse3() { write!(w, " ssse3")? }; - if info.has_cnxtid() { write!(w, " cnxtid")? }; - if info.has_fma() { write!(w, " fma")? }; - if info.has_cmpxchg16b() { write!(w, " cx16")? }; - if info.has_pdcm() { write!(w, " pdcm")? }; - if info.has_pcid() { write!(w, " pcid")? }; - if info.has_dca() { write!(w, " dca")? }; - - if info.has_sse41() { write!(w, " sse4_1")? }; - if info.has_sse42() { write!(w, " sse4_2")? }; - if info.has_x2apic() { write!(w, " x2apic")? }; - if info.has_movbe() { write!(w, " movbe")? }; - if info.has_popcnt() { write!(w, " popcnt")? }; - if info.has_tsc_deadline() { write!(w, " tsc_deadline_timer")? }; - if info.has_aesni() { write!(w, " aes")? }; - if info.has_xsave() { write!(w, " xsave")? }; - - if info.has_oxsave() { write!(w, " xsaveopt")? }; - if info.has_avx() { write!(w, " avx")? }; - if info.has_f16c() { write!(w, " f16c")? }; - if info.has_rdrand() { write!(w, " rdrand")? }; - } - - if let Some(info) = cpuid.get_extended_function_info() { - if info.has_64bit_mode() { write!(w, " lm")? }; - if info.has_rdtscp() { write!(w, " rdtscp")? }; - if info.has_1gib_pages() { write!(w, " pdpe1gb")? }; - if info.has_execute_disable() { write!(w, " nx")? }; - if info.has_syscall_sysret() { write!(w, " syscall")? }; - if info.has_prefetchw() { write!(w, " prefetchw")? }; - if info.has_lzcnt() { write!(w, " lzcnt")? }; - if info.has_lahf_sahf() { write!(w, " lahf_lm")? }; - if info.has_invariant_tsc() { write!(w, " constant_tsc")? }; - } - - if let Some(info) = cpuid.get_extended_feature_info() { - if info.has_fsgsbase() { write!(w, " fsgsbase")? }; - if info.has_tsc_adjust_msr() { write!(w, " tsc_adjust")? }; - if info.has_bmi1() { write!(w, " bmi1")? }; - if info.has_hle() { write!(w, " hle")? }; - if info.has_avx2() { write!(w, " avx2")? }; - if info.has_smep() { write!(w, " smep")? }; - if info.has_bmi2() { write!(w, " bmi2")? }; - if info.has_rep_movsb_stosb() { write!(w, " erms")? }; - if info.has_invpcid() { write!(w, " invpcid")? }; - if info.has_rtm() { write!(w, " rtm")? }; - //if info.has_qm() { write!(w, " qm")? }; - if info.has_fpu_cs_ds_deprecated() { write!(w, " fpu_seg")? }; - if info.has_mpx() { write!(w, " mpx")? }; - } - - write!(w, "\n")?; - - Ok(()) -} diff --git a/src/arch/x86_64/device/hpet.rs b/src/arch/x86_64/device/hpet.rs deleted file mode 100644 index 6f2c69bc..00000000 --- a/src/arch/x86_64/device/hpet.rs +++ /dev/null @@ -1,49 +0,0 @@ -use acpi::hpet::Hpet; - -static LEG_RT_CNF: u64 = 2; -static ENABLE_CNF: u64 = 1; - -static TN_VAL_SET_CNF: u64 = 0x40; -static TN_TYPE_CNF: u64 = 0x08; -static TN_INT_ENB_CNF: u64 = 0x04; - -static CAPABILITY_OFFSET: usize = 0x00; -static GENERAL_CONFIG_OFFSET: usize = 0x10; -// static GENERAL_INTERRUPT_OFFSET: usize = 0x20; -// static MAIN_COUNTER_OFFSET: usize = 0xF0; -// static NUM_TIMER_CAP_MASK: u64 = 0x0f00; -static LEG_RT_CAP: u64 = 0x8000; -static T0_CONFIG_CAPABILITY_OFFSET: usize = 0x100; -static T0_COMPARATOR_OFFSET: usize = 0x108; - -static PER_INT_CAP: u64 = 0x10; - -pub unsafe fn init(hpet: &mut Hpet) -> bool { - let capability = hpet.base_address.read_u64(CAPABILITY_OFFSET); - if capability & LEG_RT_CAP == 0 { - return false; - } - - let counter_clk_period_fs = capability >> 32; - let desired_fs_period: u64 = 2_250_286 * 1_000_000; - - let clk_periods_per_kernel_tick: u64 = desired_fs_period / counter_clk_period_fs; - - let t0_capabilities = hpet.base_address.read_u64(T0_CONFIG_CAPABILITY_OFFSET); - if t0_capabilities & PER_INT_CAP == 0 { - return false; - } - - let t0_config_word: u64 = TN_VAL_SET_CNF | TN_TYPE_CNF | TN_INT_ENB_CNF; - hpet.base_address.write_u64(T0_CONFIG_CAPABILITY_OFFSET, t0_config_word); - hpet.base_address.write_u64(T0_COMPARATOR_OFFSET, clk_periods_per_kernel_tick); - // set accumulator value - hpet.base_address.write_u64(T0_COMPARATOR_OFFSET, clk_periods_per_kernel_tick); - // set interval - - let enable_word: u64 = hpet.base_address.read_u64(GENERAL_CONFIG_OFFSET) | LEG_RT_CNF | ENABLE_CNF; - hpet.base_address.write_u64(GENERAL_CONFIG_OFFSET, enable_word); - // Enable interrupts from the HPET - - true -} diff --git a/src/arch/x86_64/device/local_apic.rs b/src/arch/x86_64/device/local_apic.rs deleted file mode 100644 index b08956ec..00000000 --- a/src/arch/x86_64/device/local_apic.rs +++ /dev/null @@ -1,116 +0,0 @@ -use core::intrinsics::{volatile_load, volatile_store}; -use x86::shared::cpuid::CpuId; -use x86::shared::msr::*; - -use crate::memory::Frame; -use crate::paging::{ActivePageTable, PhysicalAddress, Page, VirtualAddress}; -use crate::paging::entry::EntryFlags; - -pub static mut LOCAL_APIC: LocalApic = LocalApic { - address: 0, - x2: false -}; - -pub unsafe fn init(active_table: &mut ActivePageTable) { - LOCAL_APIC.init(active_table); -} - -pub unsafe fn init_ap() { - LOCAL_APIC.init_ap(); -} - -/// Local APIC -pub struct LocalApic { - pub address: usize, - pub x2: bool -} - -impl LocalApic { - unsafe fn init(&mut self, active_table: &mut ActivePageTable) { - self.address = (rdmsr(IA32_APIC_BASE) as usize & 0xFFFF_0000) + crate::KERNEL_OFFSET; - self.x2 = CpuId::new().get_feature_info().unwrap().has_x2apic(); - - if ! self.x2 { - let page = Page::containing_address(VirtualAddress::new(self.address)); - let frame = Frame::containing_address(PhysicalAddress::new(self.address - crate::KERNEL_OFFSET)); - let result = active_table.map_to(page, frame, EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE); - result.flush(active_table); - } - - self.init_ap(); - } - - unsafe fn init_ap(&mut self) { - if self.x2 { - wrmsr(IA32_APIC_BASE, rdmsr(IA32_APIC_BASE) | 1 << 10); - wrmsr(IA32_X2APIC_SIVR, 0x100); - } else { - self.write(0xF0, 0x100); - } - } - - unsafe fn read(&self, reg: u32) -> u32 { - volatile_load((self.address + reg as usize) as *const u32) - } - - unsafe fn write(&mut self, reg: u32, value: u32) { - volatile_store((self.address + reg as usize) as *mut u32, value); - } - - pub fn id(&self) -> u32 { - if self.x2 { - unsafe { rdmsr(IA32_X2APIC_APICID) as u32 } - } else { - unsafe { self.read(0x20) } - } - } - - pub fn version(&self) -> u32 { - if self.x2 { - unsafe { rdmsr(IA32_X2APIC_VERSION) as u32 } - } else { - unsafe { self.read(0x30) } - } - } - - pub fn icr(&self) -> u64 { - if self.x2 { - unsafe { rdmsr(IA32_X2APIC_ICR) } - } else { - unsafe { - (self.read(0x310) as u64) << 32 | self.read(0x300) as u64 - } - } - } - - pub fn set_icr(&mut self, value: u64) { - if self.x2 { - unsafe { wrmsr(IA32_X2APIC_ICR, value); } - } else { - unsafe { - while self.read(0x300) & 1 << 12 == 1 << 12 {} - self.write(0x310, (value >> 32) as u32); - self.write(0x300, value as u32); - while self.read(0x300) & 1 << 12 == 1 << 12 {} - } - } - } - - pub fn ipi(&mut self, apic_id: usize) { - let mut icr = 0x4040; - if self.x2 { - icr |= (apic_id as u64) << 32; - } else { - icr |= (apic_id as u64) << 56; - } - self.set_icr(icr); - } - - pub unsafe fn eoi(&mut self) { - if self.x2 { - wrmsr(IA32_X2APIC_EOI, 0); - } else { - self.write(0xB0, 0); - } - } -} diff --git a/src/arch/x86_64/device/mod.rs b/src/arch/x86_64/device/mod.rs deleted file mode 100644 index 6c01ad84..00000000 --- a/src/arch/x86_64/device/mod.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::paging::ActivePageTable; - -pub mod cpu; -pub mod local_apic; -pub mod pic; -pub mod pit; -pub mod rtc; -pub mod serial; -#[cfg(feature = "acpi")] -pub mod hpet; - -pub unsafe fn init(active_table: &mut ActivePageTable){ - pic::init(); - local_apic::init(active_table); -} - -#[cfg(feature = "acpi")] -unsafe fn init_hpet() -> bool { - use acpi::ACPI_TABLE; - if let Some(ref mut hpet) = *ACPI_TABLE.hpet.write() { - hpet::init(hpet) - } else { - false - } -} - -#[cfg(not(feature = "acpi"))] -unsafe fn init_hpet() -> bool { - false -} - -pub unsafe fn init_noncore() { - if ! init_hpet() { - pit::init(); - } - - rtc::init(); - serial::init(); -} - -pub unsafe fn init_ap() { - local_apic::init_ap(); -} diff --git a/src/arch/x86_64/device/pic.rs b/src/arch/x86_64/device/pic.rs deleted file mode 100644 index 88f57097..00000000 --- a/src/arch/x86_64/device/pic.rs +++ /dev/null @@ -1,64 +0,0 @@ -use crate::syscall::io::{Io, Pio}; - -pub static mut MASTER: Pic = Pic::new(0x20); -pub static mut SLAVE: Pic = Pic::new(0xA0); - -pub unsafe fn init() { - // Start initialization - MASTER.cmd.write(0x11); - SLAVE.cmd.write(0x11); - - // Set offsets - MASTER.data.write(0x20); - SLAVE.data.write(0x28); - - // Set up cascade - MASTER.data.write(4); - SLAVE.data.write(2); - - // Set up interrupt mode (1 is 8086/88 mode, 2 is auto EOI) - MASTER.data.write(1); - SLAVE.data.write(1); - - // Unmask interrupts - MASTER.data.write(0); - SLAVE.data.write(0); - - // Ack remaining interrupts - MASTER.ack(); - SLAVE.ack(); -} - -pub struct Pic { - cmd: Pio, - data: Pio, -} - -impl Pic { - pub const fn new(port: u16) -> Pic { - Pic { - cmd: Pio::new(port), - data: Pio::new(port + 1), - } - } - - pub fn ack(&mut self) { - self.cmd.write(0x20); - } - - pub fn mask_set(&mut self, irq: u8) { - assert!(irq < 8); - - let mut mask = self.data.read(); - mask |= 1 << irq; - self.data.write(mask); - } - - pub fn mask_clear(&mut self, irq: u8) { - assert!(irq < 8); - - let mut mask = self.data.read(); - mask &= !(1 << irq); - self.data.write(mask); - } -} diff --git a/src/arch/x86_64/device/pit.rs b/src/arch/x86_64/device/pit.rs deleted file mode 100644 index 0ca7b4c9..00000000 --- a/src/arch/x86_64/device/pit.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::syscall::io::{Io, Pio}; - -pub static mut CHAN0: Pio = Pio::new(0x40); -pub static mut CHAN1: Pio = Pio::new(0x41); -pub static mut CHAN2: Pio = Pio::new(0x42); -pub static mut COMMAND: Pio = Pio::new(0x43); - -static SELECT_CHAN0: u8 = 0; -static LOHI: u8 = 0x30; - -static CHAN0_DIVISOR: u16 = 2685; - -pub unsafe fn init() { - COMMAND.write(SELECT_CHAN0 | LOHI | 5); - CHAN0.write((CHAN0_DIVISOR & 0xFF) as u8); - CHAN0.write((CHAN0_DIVISOR >> 8) as u8); - - println!("Using PIT"); -} diff --git a/src/arch/x86_64/device/rtc.rs b/src/arch/x86_64/device/rtc.rs deleted file mode 100644 index 6c20f67a..00000000 --- a/src/arch/x86_64/device/rtc.rs +++ /dev/null @@ -1,148 +0,0 @@ -use crate::syscall::io::{Io, Pio}; -use crate::time; - -pub fn init() { - let mut rtc = Rtc::new(); - time::START.lock().0 = rtc.time(); -} - -fn cvt_bcd(value: usize) -> usize { - (value & 0xF) + ((value / 16) * 10) -} - -/// RTC -pub struct Rtc { - addr: Pio, - data: Pio, - nmi: bool, -} - -impl Rtc { - /// Create new empty RTC - pub fn new() -> Self { - Rtc { - addr: Pio::::new(0x70), - data: Pio::::new(0x71), - nmi: false, - } - } - - /// Read - unsafe fn read(&mut self, reg: u8) -> u8 { - if self.nmi { - self.addr.write(reg & 0x7F); - } else { - self.addr.write(reg | 0x80); - } - self.data.read() - } - - /// Write - #[allow(dead_code)] - unsafe fn write(&mut self, reg: u8, value: u8) { - if self.nmi { - self.addr.write(reg & 0x7F); - } else { - self.addr.write(reg | 0x80); - } - self.data.write(value); - } - - /// Wait for an update, can take one second if full is specified! - unsafe fn wait(&mut self, full: bool) { - if full { - while self.read(0xA) & 0x80 != 0x80 {} - } - while self.read(0xA) & 0x80 == 0x80 {} - } - - /// Get time without waiting - pub unsafe fn time_no_wait(&mut self) -> u64 { - /*let century_register = if let Some(ref fadt) = acpi::ACPI_TABLE.lock().fadt { - Some(fadt.century) - } else { - None - };*/ - - let mut second = self.read(0) as usize; - let mut minute = self.read(2) as usize; - let mut hour = self.read(4) as usize; - let mut day = self.read(7) as usize; - let mut month = self.read(8) as usize; - let mut year = self.read(9) as usize; - let mut century = /* TODO: Fix invalid value from VirtualBox - if let Some(century_reg) = century_register { - self.read(century_reg) as usize - } else */ { - 20 - }; - let register_b = self.read(0xB); - - if register_b & 4 != 4 { - second = cvt_bcd(second); - minute = cvt_bcd(minute); - hour = cvt_bcd(hour & 0x7F) | (hour & 0x80); - day = cvt_bcd(day); - month = cvt_bcd(month); - year = cvt_bcd(year); - century = /* TODO: Fix invalid value from VirtualBox - if century_register.is_some() { - cvt_bcd(century) - } else */ { - century - }; - } - - if register_b & 2 != 2 || hour & 0x80 == 0x80 { - hour = ((hour & 0x7F) + 12) % 24; - } - - year += century * 100; - - // Unix time from clock - let mut secs: u64 = (year as u64 - 1970) * 31_536_000; - - let mut leap_days = (year as u64 - 1972) / 4 + 1; - if year % 4 == 0 && month <= 2 { - leap_days -= 1; - } - secs += leap_days * 86_400; - - match month { - 2 => secs += 2_678_400, - 3 => secs += 5_097_600, - 4 => secs += 7_776_000, - 5 => secs += 10_368_000, - 6 => secs += 13_046_400, - 7 => secs += 15_638_400, - 8 => secs += 18_316_800, - 9 => secs += 20_995_200, - 10 => secs += 23_587_200, - 11 => secs += 26_265_600, - 12 => secs += 28_857_600, - _ => (), - } - - secs += (day as u64 - 1) * 86_400; - secs += hour as u64 * 3600; - secs += minute as u64 * 60; - secs += second as u64; - - secs - } - - /// Get time - pub fn time(&mut self) -> u64 { - loop { - unsafe { - self.wait(false); - let time = self.time_no_wait(); - self.wait(false); - let next_time = self.time_no_wait(); - if time == next_time { - return time; - } - } - } - } -} diff --git a/src/arch/x86_64/device/serial.rs b/src/arch/x86_64/device/serial.rs deleted file mode 100644 index c2b424c0..00000000 --- a/src/arch/x86_64/device/serial.rs +++ /dev/null @@ -1,11 +0,0 @@ -use crate::devices::uart_16550::SerialPort; -use crate::syscall::io::Pio; -use spin::Mutex; - -pub static COM1: Mutex>> = Mutex::new(SerialPort::>::new(0x3F8)); -pub static COM2: Mutex>> = Mutex::new(SerialPort::>::new(0x2F8)); - -pub unsafe fn init() { - COM1.lock().init(); - COM2.lock().init(); -} diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index 652f8103..83c1e6de 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -1,24 +1,28 @@ //! Global descriptor table -use core::mem; -use x86::current::segmentation::set_cs; -use x86::current::task::TaskStateSegment; -use x86::shared::PrivilegeLevel; -use x86::shared::dtables::{self, DescriptorTablePointer}; -use x86::shared::segmentation::{self, SegmentDescriptor, SegmentSelector}; -use x86::shared::task; +use core::{convert::TryInto, mem::size_of}; -use crate::paging::PAGE_SIZE; +use crate::{ + cpu_set::LogicalCpuId, + paging::{RmmA, RmmArch, PAGE_SIZE}, + percpu::PercpuBlock, +}; + +use x86::{ + bits64::task::TaskStateSegment, + dtables::{self, DescriptorTablePointer}, + segmentation::{self, Descriptor as SegmentDescriptor, SegmentSelector}, + task, Ring, +}; pub const GDT_NULL: usize = 0; pub const GDT_KERNEL_CODE: usize = 1; pub const GDT_KERNEL_DATA: usize = 2; -pub const GDT_KERNEL_TLS: usize = 3; -pub const GDT_USER_CODE: usize = 4; -pub const GDT_USER_DATA: usize = 5; -pub const GDT_USER_TLS: usize = 6; -pub const GDT_TSS: usize = 7; -pub const GDT_TSS_HIGH: usize = 8; +pub const GDT_USER_CODE32_UNUSED: usize = 3; +pub const GDT_USER_DATA: usize = 4; +pub const GDT_USER_CODE: usize = 5; +pub const GDT_TSS: usize = 6; +pub const GDT_TSS_HIGH: usize = 7; pub const GDT_A_PRESENT: u8 = 1 << 7; pub const GDT_A_RING_0: u8 = 0 << 5; @@ -38,148 +42,237 @@ pub const GDT_F_PAGE_SIZE: u8 = 1 << 7; pub const GDT_F_PROTECTED_MODE: u8 = 1 << 6; pub const GDT_F_LONG_MODE: u8 = 1 << 5; -static mut INIT_GDTR: DescriptorTablePointer = DescriptorTablePointer { - limit: 0, - base: 0 as *const SegmentDescriptor -}; +const IOBITMAP_SIZE: u32 = 65536 / 8; -static mut INIT_GDT: [GdtEntry; 4] = [ +static INIT_GDT: [GdtEntry; 3] = [ // Null GdtEntry::new(0, 0, 0, 0), // Kernel code - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, GDT_F_LONG_MODE), + GdtEntry::new( + 0, + 0, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, + GDT_F_LONG_MODE, + ), // Kernel data - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE), - // Kernel TLS - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE) + GdtEntry::new( + 0, + 0, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_LONG_MODE, + ), ]; -#[thread_local] -pub static mut GDTR: DescriptorTablePointer = DescriptorTablePointer { - limit: 0, - base: 0 as *const SegmentDescriptor -}; - -#[thread_local] -pub static mut GDT: [GdtEntry; 9] = [ +// Later copied into the actual GDT with various fields set. +const BASE_GDT: [GdtEntry; 8] = [ // Null GdtEntry::new(0, 0, 0, 0), // Kernel code - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, GDT_F_LONG_MODE), + GdtEntry::new( + 0, + 0, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, + GDT_F_LONG_MODE, + ), // Kernel data - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE), - // Kernel TLS - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE), - // User code - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, GDT_F_LONG_MODE), + GdtEntry::new( + 0, + 0, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_LONG_MODE, + ), + // Dummy 32-bit user code - apparently necessary for SYSRET. We restrict it to ring 0 anyway. + GdtEntry::new( + 0, + 0, + GDT_A_PRESENT | GDT_A_RING_0 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, + GDT_F_PROTECTED_MODE, + ), // User data - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE), - // User TLS - GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE), + GdtEntry::new( + 0, + 0, + GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, + GDT_F_LONG_MODE, + ), + // User (64-bit) code + GdtEntry::new( + 0, + 0, + GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, + GDT_F_LONG_MODE, + ), // TSS GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_TSS_AVAIL, 0), // TSS must be 16 bytes long, twice the normal size GdtEntry::new(0, 0, 0, 0), ]; -#[thread_local] -pub static mut TSS: TaskStateSegment = TaskStateSegment { - reserved: 0, - rsp: [0; 3], - reserved2: 0, - ist: [0; 7], - reserved3: 0, - reserved4: 0, - iomap_base: 0xFFFF +#[repr(C, align(16))] +struct Align([usize; 2]); + +#[repr(C, align(4096))] +pub struct ProcessorControlRegion { + // TODO: When both KASLR and KPTI are implemented, the PCR may need to be split into two pages, + // such that "secret" kernel addresses are only stored in the protected half. + pub self_ref: usize, + + pub user_rsp_tmp: usize, + // The GDT *must* be stored in the PCR! The paranoid interrupt handler, lacking a reliable way + // to correctly obtain GSBASE, uses SGDT to calculate the PCR offset. + pub gdt: [GdtEntry; 8], + pub percpu: PercpuBlock, + _rsvd: Align, + pub tss: TaskStateSegment, + + // These two fields are read by the CPU, but not currently modified by the kernel. Instead, the + // kernel sets the `iomap_base` field in the TSS, to either point to this bitmap, or outside + // the TSS, in which case userspace is not granted port IO access. + pub _iobitmap: [u8; IOBITMAP_SIZE as usize], + pub _all_ones: u8, +} + +const _: () = { + if core::mem::offset_of!(ProcessorControlRegion, tss) % 16 != 0 { + panic!("PCR is incorrectly defined, TSS alignment is too small"); + } + if core::mem::offset_of!(ProcessorControlRegion, gdt) % 8 != 0 { + panic!("PCR is incorrectly defined, GDT alignment is too small"); + } }; -pub unsafe fn set_tcb(pid: usize) { - GDT[GDT_USER_TLS].set_offset((crate::USER_TCB_OFFSET + pid * PAGE_SIZE) as u32); +pub unsafe fn pcr() -> *mut ProcessorControlRegion { + // Primitive benchmarking of RDFSBASE and RDGSBASE in userspace, appears to indicate that + // obtaining FSBASE/GSBASE using mov gs:[gs_self_ref] is faster than using the (probably + // microcoded) instructions. + let mut ret: *mut ProcessorControlRegion; + core::arch::asm!("mov {}, gs:[{}]", out(reg) ret, const(core::mem::offset_of!(ProcessorControlRegion, self_ref))); + ret } #[cfg(feature = "pti")] -pub unsafe fn set_tss_stack(stack: usize) { - use arch::x86_64::pti::{PTI_CPU_STACK, PTI_CONTEXT_STACK}; - TSS.rsp[0] = (PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len()) as u64; +pub unsafe fn set_tss_stack(pcr: *mut ProcessorControlRegion, stack: usize) { + use super::pti::{PTI_CONTEXT_STACK, PTI_CPU_STACK}; + core::ptr::addr_of_mut!((*pcr).tss.rsp[0]) + .write_unaligned((PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len()) as u64); PTI_CONTEXT_STACK = stack; } #[cfg(not(feature = "pti"))] -pub unsafe fn set_tss_stack(stack: usize) { - TSS.rsp[0] = stack as u64; +pub unsafe fn set_tss_stack(pcr: *mut ProcessorControlRegion, stack: usize) { + // TODO: If this increases performance, read gs:[offset] directly + core::ptr::addr_of_mut!((*pcr).tss.rsp[0]).write_unaligned(stack as u64); } -// Initialize GDT +pub unsafe fn set_userspace_io_allowed(pcr: *mut ProcessorControlRegion, allowed: bool) { + let offset = if allowed { + u16::try_from(size_of::()).unwrap() + } else { + 0xFFFF + }; + core::ptr::addr_of_mut!((*pcr).tss.iomap_base).write(offset); +} + +// Initialize startup GDT +#[cold] pub unsafe fn init() { - // Setup the initial GDT with TLS, so we can setup the TLS GDT (a little confusing) - // This means that each CPU will have its own GDT, but we only need to define it once as a thread local - INIT_GDTR.limit = (INIT_GDT.len() * mem::size_of::() - 1) as u16; - INIT_GDTR.base = INIT_GDT.as_ptr() as *const SegmentDescriptor; - - // Load the initial GDT, before we have access to thread locals - dtables::lgdt(&INIT_GDTR); - - // Load the segment descriptors - set_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, PrivilegeLevel::Ring0)); - segmentation::load_ds(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); - segmentation::load_es(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); - segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); - segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); - segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); + // Before the kernel can remap itself, it needs to switch to a GDT it controls. Start with a + // minimal kernel-only GDT. + dtables::lgdt(&DescriptorTablePointer { + limit: (INIT_GDT.len() * size_of::() - 1) as u16, + base: INIT_GDT.as_ptr() as *const SegmentDescriptor, + }); + + load_segments(); +} +#[cold] +unsafe fn load_segments() { + segmentation::load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0)); + segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + + segmentation::load_ds(SegmentSelector::from_raw(0)); + segmentation::load_es(SegmentSelector::from_raw(0)); + segmentation::load_fs(SegmentSelector::from_raw(0)); + + // What happens when GS is loaded with a NULL selector, is undefined on Intel CPUs. However, + // GSBASE is set later, and percpu is not used until gdt::init_paging(). + segmentation::load_gs(SegmentSelector::from_raw(0)); } -/// Initialize GDT with TLS -pub unsafe fn init_paging(tcb_offset: usize, stack_offset: usize) { - // Set the TLS segment to the offset of the Thread Control Block - INIT_GDT[GDT_KERNEL_TLS].set_offset(tcb_offset as u32); +/// Initialize GDT and PCR. +#[cold] +pub unsafe fn init_paging(stack_offset: usize, cpu_id: LogicalCpuId) { + let alloc_order = size_of::() + .div_ceil(PAGE_SIZE) + .next_power_of_two() + .trailing_zeros(); + let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR"); + let pcr = &mut *(RmmA::phys_to_virt(pcr_frame.base()).data() as *mut ProcessorControlRegion); - // Load the initial GDT, before we have access to thread locals - dtables::lgdt(&INIT_GDTR); + pcr.self_ref = pcr as *mut ProcessorControlRegion as usize; - // Load the segment descriptors - segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_TLS as u16, PrivilegeLevel::Ring0)); + // Setup the GDT. + pcr.gdt = BASE_GDT; - // Now that we have access to thread locals, setup the AP's individual GDT - GDTR.limit = (GDT.len() * mem::size_of::() - 1) as u16; - GDTR.base = GDT.as_ptr() as *const SegmentDescriptor; + let limit = (pcr.gdt.len() * size_of::() - 1) + .try_into() + .expect("main GDT way too large"); + let base = pcr.gdt.as_ptr() as *const SegmentDescriptor; - // Set the TLS segment to the offset of the Thread Control Block - GDT[GDT_KERNEL_TLS].set_offset(tcb_offset as u32); + let gdtr: DescriptorTablePointer = DescriptorTablePointer { limit, base }; - // Set the User TLS segment to the offset of the user TCB - set_tcb(0); + { + pcr.tss.iomap_base = 0xFFFF; + pcr._all_ones = 0xFF; - // We can now access our TSS, which is a thread local - GDT[GDT_TSS].set_offset(&TSS as *const _ as u32); - GDT[GDT_TSS].set_limit(mem::size_of::() as u32); + let tss = &mut pcr.tss as *mut TaskStateSegment as usize as u64; + let tss_lo = (tss & 0xFFFF_FFFF) as u32; + let tss_hi = (tss >> 32) as u32; - // Set the stack pointer when coming back from userspace - set_tss_stack(stack_offset); + pcr.gdt[GDT_TSS].set_offset(tss_lo); + pcr.gdt[GDT_TSS].set_limit(size_of::() as u32 + IOBITMAP_SIZE); - // Load the new GDT, which is correctly located in thread local storage - dtables::lgdt(&GDTR); + (&mut pcr.gdt[GDT_TSS_HIGH] as *mut GdtEntry) + .cast::() + .write(tss_hi); + } + + // Load the new GDT, which is correctly located in thread local storage. + dtables::lgdt(&gdtr); + + // Load segments again, possibly resetting FSBASE and GSBASE. + load_segments(); + + // Ensure that GSBASE always points to the PCR in kernel space. + x86::msr::wrmsr(x86::msr::IA32_GS_BASE, pcr as *mut _ as usize as u64); + + // While GSBASE points to the PCR in kernel space, userspace is free to set it to other values. + // Zero-initialize userspace's GSBASE. The reason the GSBASE register writes are reversed, is + // because entering usermode will entail executing the SWAPGS instruction. + x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0); + + // Set the userspace FSBASE to zero. + x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0); - // Reload the segment descriptors - set_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, PrivilegeLevel::Ring0)); - segmentation::load_ds(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); - segmentation::load_es(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); - segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_TLS as u16, PrivilegeLevel::Ring0)); - segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); - segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, PrivilegeLevel::Ring0)); + // Set the stack pointer to use when coming back from userspace. + set_tss_stack(pcr, stack_offset); // Load the task register - task::load_tr(SegmentSelector::new(GDT_TSS as u16, PrivilegeLevel::Ring0)); -} + task::load_tr(SegmentSelector::new(GDT_TSS as u16, Ring::Ring0)); + + pcr.percpu = PercpuBlock::init(cpu_id); + crate::percpu::init_tlb_shootdown(cpu_id, &mut pcr.percpu); +} #[derive(Copy, Clone, Debug)] -#[repr(packed)] +#[repr(C, packed)] pub struct GdtEntry { pub limitl: u16, pub offsetl: u16, pub offsetm: u8, pub access: u8, pub flags_limith: u8, - pub offseth: u8 + pub offseth: u8, } impl GdtEntry { @@ -188,9 +281,9 @@ impl GdtEntry { limitl: limit as u16, offsetl: offset as u16, offsetm: (offset >> 16) as u8, - access: access, + access, flags_limith: flags & 0xF0 | ((limit >> 16) as u8) & 0x0F, - offseth: (offset >> 24) as u8 + offseth: (offset >> 24) as u8, } } @@ -205,3 +298,9 @@ impl GdtEntry { self.flags_limith = self.flags_limith & 0xF0 | ((limit >> 16) as u8) & 0x0F; } } + +impl PercpuBlock { + pub fn current() -> &'static Self { + unsafe { &*core::ptr::addr_of!((*pcr()).percpu) } + } +} diff --git a/src/arch/x86_64/graphical_debug/debug.rs b/src/arch/x86_64/graphical_debug/debug.rs deleted file mode 100644 index c0cbc3c5..00000000 --- a/src/arch/x86_64/graphical_debug/debug.rs +++ /dev/null @@ -1,83 +0,0 @@ -use core::fmt; - -use super::Display; - -pub struct DebugDisplay { - display: Display, - x: usize, - y: usize, - w: usize, - h: usize, -} - -impl DebugDisplay { - pub fn new(display: Display) -> DebugDisplay { - let w = display.width/8; - let h = display.height/16; - DebugDisplay { - display, - x: 0, - y: 0, - w: w, - h: h, - } - } - - pub fn into_display(self) -> Display { - self.display - } - - pub fn write_char(&mut self, c: char) { - if self.x >= self.w || c == '\n' { - self.x = 0; - self.y += 1; - } - - if self.y >= self.h { - let new_y = self.h - 1; - let d_y = self.y - new_y; - - self.display.scroll(d_y * 16); - - self.display.rect( - 0, (self.h - d_y) * 16, - self.w * 8, d_y * 16, - 0x000000 - ); - - self.display.sync( - 0, 0, - self.w * 8, self.h * 16 - ); - - self.y = new_y; - } - - if c != '\n' { - self.display.rect( - self.x * 8, self.y * 16, - 8, 16, - 0x000000 - ); - - self.display.char( - self.x * 8, self.y * 16, - c, - 0xFFFFFF - ); - - self.display.sync( - self.x * 8, self.y * 16, - 8, 16 - ); - - self.x += 1; - } - } - - pub fn write(&mut self, buf: &[u8]) { - for &b in buf { - self.write_char(b as char); - } - } -} diff --git a/src/arch/x86_64/graphical_debug/display.rs b/src/arch/x86_64/graphical_debug/display.rs deleted file mode 100644 index 682c56e1..00000000 --- a/src/arch/x86_64/graphical_debug/display.rs +++ /dev/null @@ -1,149 +0,0 @@ -use core::alloc::{GlobalAlloc, Layout}; -use core::{cmp, slice}; - -use super::FONT; -use super::primitive::{fast_set32, fast_set64, fast_copy}; - -/// A display -pub struct Display { - pub width: usize, - pub height: usize, - pub onscreen: &'static mut [u32], - pub offscreen: &'static mut [u32], -} - -impl Display { - pub fn new(width: usize, height: usize, onscreen: usize) -> Display { - let size = width * height; - let offscreen = unsafe { ::ALLOCATOR.alloc(Layout::from_size_align_unchecked(size * 4, 4096)) }; - unsafe { fast_set64(offscreen as *mut u64, 0, size/2) }; - Display { - width: width, - height: height, - onscreen: unsafe { slice::from_raw_parts_mut(onscreen as *mut u32, size) }, - offscreen: unsafe { slice::from_raw_parts_mut(offscreen as *mut u32, size) } - } - } - - /// Draw a rectangle - pub fn rect(&mut self, x: usize, y: usize, w: usize, h: usize, color: u32) { - let start_y = cmp::min(self.height, y); - let end_y = cmp::min(self.height, y + h); - - let start_x = cmp::min(self.width, x); - let len = cmp::min(self.width, x + w) - start_x; - - let mut offscreen_ptr = self.offscreen.as_mut_ptr() as usize; - - let stride = self.width * 4; - - let offset = y * stride + start_x * 4; - offscreen_ptr += offset; - - let mut rows = end_y - start_y; - while rows > 0 { - unsafe { - fast_set32(offscreen_ptr as *mut u32, color, len); - } - offscreen_ptr += stride; - rows -= 1; - } - } - - /// Invert a rectangle - pub fn invert(&mut self, x: usize, y: usize, w: usize, h: usize) { - let start_y = cmp::min(self.height, y); - let end_y = cmp::min(self.height, y + h); - - let start_x = cmp::min(self.width, x); - let len = cmp::min(self.width, x + w) - start_x; - - let mut offscreen_ptr = self.offscreen.as_mut_ptr() as usize; - - let stride = self.width * 4; - - let offset = y * stride + start_x * 4; - offscreen_ptr += offset; - - let mut rows = end_y - start_y; - while rows > 0 { - let mut row_ptr = offscreen_ptr; - let mut cols = len; - while cols > 0 { - unsafe { - let color = *(row_ptr as *mut u32); - *(row_ptr as *mut u32) = !color; - } - row_ptr += 4; - cols -= 1; - } - offscreen_ptr += stride; - rows -= 1; - } - } - - /// Draw a character - pub fn char(&mut self, x: usize, y: usize, character: char, color: u32) { - if x + 8 <= self.width && y + 16 <= self.height { - let mut dst = self.offscreen.as_mut_ptr() as usize + (y * self.width + x) * 4; - - let font_i = 16 * (character as usize); - if font_i + 16 <= FONT.len() { - for row in 0..16 { - let row_data = FONT[font_i + row]; - for col in 0..8 { - if (row_data >> (7 - col)) & 1 == 1 { - unsafe { *((dst + col * 4) as *mut u32) = color; } - } - } - dst += self.width * 4; - } - } - } - } - - // Scroll the screen - pub fn scroll(&mut self, lines: usize) { - let offset = cmp::min(self.height, lines) * self.width; - let size = self.offscreen.len() - offset; - unsafe { - let to = self.offscreen.as_mut_ptr(); - let from = to.offset(offset as isize); - fast_copy(to as *mut u8, from as *const u8, size * 4); - } - } - - /// Copy from offscreen to onscreen - pub fn sync(&mut self, x: usize, y: usize, w: usize, h: usize) { - let start_y = cmp::min(self.height, y); - let end_y = cmp::min(self.height, y + h); - - let start_x = cmp::min(self.width, x); - let len = (cmp::min(self.width, x + w) - start_x) * 4; - - let mut offscreen_ptr = self.offscreen.as_mut_ptr() as usize; - let mut onscreen_ptr = self.onscreen.as_mut_ptr() as usize; - - let stride = self.width * 4; - - let offset = y * stride + start_x * 4; - offscreen_ptr += offset; - onscreen_ptr += offset; - - let mut rows = end_y - start_y; - while rows > 0 { - unsafe { - fast_copy(onscreen_ptr as *mut u8, offscreen_ptr as *const u8, len); - } - offscreen_ptr += stride; - onscreen_ptr += stride; - rows -= 1; - } - } -} - -impl Drop for Display { - fn drop(&mut self) { - unsafe { ::ALLOCATOR.dealloc(self.offscreen.as_mut_ptr() as *mut u8, Layout::from_size_align_unchecked(self.offscreen.len() * 4, 4096)) }; - } -} diff --git a/src/arch/x86_64/graphical_debug/mod.rs b/src/arch/x86_64/graphical_debug/mod.rs deleted file mode 100644 index 081f5678..00000000 --- a/src/arch/x86_64/graphical_debug/mod.rs +++ /dev/null @@ -1,97 +0,0 @@ -use spin::Mutex; - -use memory::Frame; -use paging::{ActivePageTable, Page, PhysicalAddress, VirtualAddress}; -use paging::entry::EntryFlags; -use paging::mapper::MapperFlushAll; - -pub use self::debug::DebugDisplay; -use self::display::Display; -use self::mode_info::VBEModeInfo; -use self::primitive::fast_set64; - -pub mod debug; -pub mod display; -pub mod mode_info; -pub mod primitive; - -pub static FONT: &'static [u8] = include_bytes!("../../../../res/unifont.font"); - -pub static DEBUG_DISPLAY: Mutex> = Mutex::new(None); - -pub fn init(active_table: &mut ActivePageTable) { - println!("Starting graphical debug"); - - let width; - let height; - let physbaseptr; - - { - let mode_info_addr = 0x5200; - - { - let page = Page::containing_address(VirtualAddress::new(mode_info_addr)); - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().get())); - let result = active_table.map_to(page, frame, EntryFlags::PRESENT | EntryFlags::NO_EXECUTE); - result.flush(active_table); - } - - { - let mode_info = unsafe { &*(mode_info_addr as *const VBEModeInfo) }; - - width = mode_info.xresolution as usize; - height = mode_info.yresolution as usize; - physbaseptr = mode_info.physbaseptr as usize; - } - - { - let page = Page::containing_address(VirtualAddress::new(mode_info_addr)); - let (result, _frame) = active_table.unmap_return(page, false); - result.flush(active_table); - } - } - - { - let size = width * height; - - let onscreen = physbaseptr + ::KERNEL_OFFSET; - { - let mut flush_all = MapperFlushAll::new(); - let start_page = Page::containing_address(VirtualAddress::new(onscreen)); - let end_page = Page::containing_address(VirtualAddress::new(onscreen + size * 4)); - for page in Page::range_inclusive(start_page, end_page) { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().get() - ::KERNEL_OFFSET)); - let flags = EntryFlags::PRESENT | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE | EntryFlags::HUGE_PAGE; - let result = active_table.map_to(page, frame, flags); - flush_all.consume(result); - } - flush_all.flush(active_table); - } - - unsafe { fast_set64(onscreen as *mut u64, 0, size/2) }; - - let display = Display::new(width, height, onscreen); - let debug_display = DebugDisplay::new(display); - *DEBUG_DISPLAY.lock() = Some(debug_display); - } -} - -pub fn fini(active_table: &mut ActivePageTable) { - if let Some(debug_display) = DEBUG_DISPLAY.lock().take() { - let display = debug_display.into_display(); - let onscreen = display.onscreen.as_mut_ptr() as usize; - let size = display.width * display.height; - { - let mut flush_all = MapperFlushAll::new(); - let start_page = Page::containing_address(VirtualAddress::new(onscreen)); - let end_page = Page::containing_address(VirtualAddress::new(onscreen + size * 4)); - for page in Page::range_inclusive(start_page, end_page) { - let (result, _frame) = active_table.unmap_return(page, false); - flush_all.consume(result); - } - flush_all.flush(active_table); - } - } - - println!("Finished graphical debug"); -} diff --git a/src/arch/x86_64/graphical_debug/mode_info.rs b/src/arch/x86_64/graphical_debug/mode_info.rs deleted file mode 100644 index 7d59af64..00000000 --- a/src/arch/x86_64/graphical_debug/mode_info.rs +++ /dev/null @@ -1,37 +0,0 @@ -/// The info of the VBE mode -#[derive(Copy, Clone, Default, Debug)] -#[repr(packed)] -pub struct VBEModeInfo { - attributes: u16, - win_a: u8, - win_b: u8, - granularity: u16, - winsize: u16, - segment_a: u16, - segment_b: u16, - winfuncptr: u32, - bytesperscanline: u16, - pub xresolution: u16, - pub yresolution: u16, - xcharsize: u8, - ycharsize: u8, - numberofplanes: u8, - bitsperpixel: u8, - numberofbanks: u8, - memorymodel: u8, - banksize: u8, - numberofimagepages: u8, - unused: u8, - redmasksize: u8, - redfieldposition: u8, - greenmasksize: u8, - greenfieldposition: u8, - bluemasksize: u8, - bluefieldposition: u8, - rsvdmasksize: u8, - rsvdfieldposition: u8, - directcolormodeinfo: u8, - pub physbaseptr: u32, - offscreenmemoryoffset: u32, - offscreenmemsize: u16, -} diff --git a/src/arch/x86_64/graphical_debug/primitive.rs b/src/arch/x86_64/graphical_debug/primitive.rs deleted file mode 100644 index 922e89c5..00000000 --- a/src/arch/x86_64/graphical_debug/primitive.rs +++ /dev/null @@ -1,35 +0,0 @@ -#[cfg(target_arch = "x86_64")] -#[inline(always)] -#[cold] -pub unsafe fn fast_copy(dst: *mut u8, src: *const u8, len: usize) { - asm!("cld - rep movsb" - : - : "{rdi}"(dst as usize), "{rsi}"(src as usize), "{rcx}"(len) - : "cc", "memory", "rdi", "rsi", "rcx" - : "intel", "volatile"); -} - -#[cfg(target_arch = "x86_64")] -#[inline(always)] -#[cold] -pub unsafe fn fast_set32(dst: *mut u32, src: u32, len: usize) { - asm!("cld - rep stosd" - : - : "{rdi}"(dst as usize), "{eax}"(src), "{rcx}"(len) - : "cc", "memory", "rdi", "rcx" - : "intel", "volatile"); -} - -#[cfg(target_arch = "x86_64")] -#[inline(always)] -#[cold] -pub unsafe fn fast_set64(dst: *mut u64, src: u64, len: usize) { - asm!("cld - rep stosq" - : - : "{rdi}"(dst as usize), "{rax}"(src), "{rcx}"(len) - : "cc", "memory", "rdi", "rcx" - : "intel", "volatile"); -} diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs deleted file mode 100644 index 1a1d6449..00000000 --- a/src/arch/x86_64/idt.rs +++ /dev/null @@ -1,140 +0,0 @@ -use core::mem; -use x86::current::irq::IdtEntry as X86IdtEntry; -use x86::shared::dtables::{self, DescriptorTablePointer}; - -use crate::interrupt::*; -use crate::ipi::IpiKind; - -pub static mut INIT_IDTR: DescriptorTablePointer = DescriptorTablePointer { - limit: 0, - base: 0 as *const X86IdtEntry -}; - -pub static mut IDTR: DescriptorTablePointer = DescriptorTablePointer { - limit: 0, - base: 0 as *const X86IdtEntry -}; - -pub static mut IDT: [IdtEntry; 256] = [IdtEntry::new(); 256]; - -pub unsafe fn init() { - dtables::lidt(&INIT_IDTR); -} - -pub unsafe fn init_paging() { - IDTR.limit = (IDT.len() * mem::size_of::() - 1) as u16; - IDTR.base = IDT.as_ptr() as *const X86IdtEntry; - - // Set up exceptions - IDT[0].set_func(exception::divide_by_zero); - IDT[1].set_func(exception::debug); - IDT[2].set_func(exception::non_maskable); - IDT[3].set_func(exception::breakpoint); - IDT[3].set_flags(IdtFlags::PRESENT | IdtFlags::RING_3 | IdtFlags::INTERRUPT); - IDT[4].set_func(exception::overflow); - IDT[5].set_func(exception::bound_range); - IDT[6].set_func(exception::invalid_opcode); - IDT[7].set_func(exception::device_not_available); - IDT[8].set_func(exception::double_fault); - // 9 no longer available - IDT[10].set_func(exception::invalid_tss); - IDT[11].set_func(exception::segment_not_present); - IDT[12].set_func(exception::stack_segment); - IDT[13].set_func(exception::protection); - IDT[14].set_func(exception::page); - // 15 reserved - IDT[16].set_func(exception::fpu); - IDT[17].set_func(exception::alignment_check); - IDT[18].set_func(exception::machine_check); - IDT[19].set_func(exception::simd); - IDT[20].set_func(exception::virtualization); - // 21 through 29 reserved - IDT[30].set_func(exception::security); - // 31 reserved - - // Set up IRQs - IDT[32].set_func(irq::pit); - IDT[33].set_func(irq::keyboard); - IDT[34].set_func(irq::cascade); - IDT[35].set_func(irq::com2); - IDT[36].set_func(irq::com1); - IDT[37].set_func(irq::lpt2); - IDT[38].set_func(irq::floppy); - IDT[39].set_func(irq::lpt1); - IDT[40].set_func(irq::rtc); - IDT[41].set_func(irq::pci1); - IDT[42].set_func(irq::pci2); - IDT[43].set_func(irq::pci3); - IDT[44].set_func(irq::mouse); - IDT[45].set_func(irq::fpu); - IDT[46].set_func(irq::ata1); - IDT[47].set_func(irq::ata2); - - // Set IPI handlers - IDT[IpiKind::Wakeup as usize].set_func(ipi::wakeup); - IDT[IpiKind::Switch as usize].set_func(ipi::switch); - IDT[IpiKind::Tlb as usize].set_func(ipi::tlb); - IDT[IpiKind::Pit as usize].set_func(ipi::pit); - - // Set syscall function - IDT[0x80].set_func(syscall::syscall); - IDT[0x80].set_flags(IdtFlags::PRESENT | IdtFlags::RING_3 | IdtFlags::INTERRUPT); - - dtables::lidt(&IDTR); -} - -bitflags! { - pub struct IdtFlags: u8 { - const PRESENT = 1 << 7; - const RING_0 = 0 << 5; - const RING_1 = 1 << 5; - const RING_2 = 2 << 5; - const RING_3 = 3 << 5; - const SS = 1 << 4; - const INTERRUPT = 0xE; - const TRAP = 0xF; - } -} - -#[derive(Copy, Clone, Debug)] -#[repr(packed)] -pub struct IdtEntry { - offsetl: u16, - selector: u16, - zero: u8, - attribute: u8, - offsetm: u16, - offseth: u32, - zero2: u32 -} - -impl IdtEntry { - pub const fn new() -> IdtEntry { - IdtEntry { - offsetl: 0, - selector: 0, - zero: 0, - attribute: 0, - offsetm: 0, - offseth: 0, - zero2: 0 - } - } - - pub fn set_flags(&mut self, flags: IdtFlags) { - self.attribute = flags.bits; - } - - pub fn set_offset(&mut self, selector: u16, base: usize) { - self.selector = selector; - self.offsetl = base as u16; - self.offsetm = (base >> 16) as u16; - self.offseth = (base >> 32) as u32; - } - - // A function to set the offset more easily - pub fn set_func(&mut self, func: unsafe extern fn()) { - self.set_flags(IdtFlags::PRESENT | IdtFlags::RING_0 | IdtFlags::INTERRUPT); - self.set_offset(8, func as usize); - } -} diff --git a/src/arch/x86_64/interrupt/exception.rs b/src/arch/x86_64/interrupt/exception.rs index c98e5794..3da86b8a 100644 --- a/src/arch/x86_64/interrupt/exception.rs +++ b/src/arch/x86_64/interrupt/exception.rs @@ -1,168 +1,275 @@ +use syscall::Exception; +use x86::irq::PageFaultError; + use crate::{ - interrupt::stack_trace, - ptrace, - syscall::flag::* + arch::x86_shared::interrupt, context::signal::excp_handler, interrupt_error, interrupt_stack, + memory::GenericPfFlags, paging::VirtualAddress, panic::stack_trace, ptrace, syscall::flag::*, }; -extern { - fn ksignal(signal: usize); -} - -interrupt_stack!(divide_by_zero, stack, { +interrupt_stack!(divide_by_zero, |stack| { println!("Divide by zero"); stack.dump(); stack_trace(); - ksignal(SIGFPE); + excp_handler(Exception { + kind: 0, + ..Default::default() + }); }); -interrupt_stack!(debug, stack, { +interrupt_stack!(debug, @paranoid, |stack| { let mut handled = false; - let guard = ptrace::set_process_regs(stack); - - // Disable singlestep before their is a breakpoint, since the - // breakpoint handler might end up setting it again but unless it - // does we want the default to be false. + // Disable singlestep before there is a breakpoint, since the breakpoint + // handler might end up setting it again but unless it does we want the + // default to be false. let had_singlestep = stack.iret.rflags & (1 << 8) == 1 << 8; stack.set_singlestep(false); - if ptrace::breakpoint_callback(syscall::PTRACE_SINGLESTEP).is_some() { + if ptrace::breakpoint_callback(PTRACE_STOP_SINGLESTEP, None).is_some() { handled = true; } else { // There was no breakpoint, restore original value stack.set_singlestep(had_singlestep); } - drop(guard); - if !handled { println!("Debug trap"); stack.dump(); - ksignal(SIGTRAP); + excp_handler(Exception { + kind: 1, + ..Default::default() + }); } }); -interrupt_stack!(non_maskable, stack, { - println!("Non-maskable interrupt"); - stack.dump(); +interrupt_stack!(non_maskable, @paranoid, |stack| { + #[cfg(feature = "profiling")] + crate::profiling::nmi_handler(stack); + + #[cfg(not(feature = "profiling"))] + { + // TODO: This will likely deadlock + println!("Non-maskable interrupt"); + stack.dump(); + } }); -interrupt_stack!(breakpoint, stack, { - println!("Breakpoint trap"); - stack.dump(); - ksignal(SIGTRAP); +interrupt_stack!(breakpoint, |stack| { + // The processor lets RIP point to the instruction *after* int3, so + // unhandled breakpoint interrupt don't go in an infinite loop. But we + // throw SIGTRAP anyway, so that's not a problem. + // + // We have the following code to prevent + // - RIP from going out of sync with instructions + // - The user having to do 2 syscalls to replace the instruction at RIP + // - Having more compatibility glue for GDB than necessary + // + // Let's just follow Linux convention and let RIP be RIP-1, point to the + // int3 instruction. After all, it's the sanest thing to do. + stack.iret.rip -= 1; + + if ptrace::breakpoint_callback(PTRACE_STOP_BREAKPOINT, None).is_none() { + println!("Breakpoint trap"); + stack.dump(); + excp_handler(Exception { + kind: 3, + ..Default::default() + }); + } }); -interrupt_stack!(overflow, stack, { +interrupt_stack!(overflow, |stack| { println!("Overflow trap"); stack.dump(); stack_trace(); - ksignal(SIGFPE); + excp_handler(Exception { + kind: 4, + ..Default::default() + }); }); -interrupt_stack!(bound_range, stack, { +interrupt_stack!(bound_range, |stack| { println!("Bound range exceeded fault"); stack.dump(); stack_trace(); - ksignal(SIGSEGV); + excp_handler(Exception { + kind: 5, + ..Default::default() + }); }); -interrupt_stack!(invalid_opcode, stack, { +interrupt_stack!(invalid_opcode, |stack| { println!("Invalid opcode fault"); stack.dump(); stack_trace(); - ksignal(SIGILL); + excp_handler(Exception { + kind: 6, + ..Default::default() + }); }); -interrupt_stack!(device_not_available, stack, { +interrupt_stack!(device_not_available, |stack| { println!("Device not available fault"); stack.dump(); stack_trace(); - ksignal(SIGILL); + excp_handler(Exception { + kind: 7, + ..Default::default() + }); }); -interrupt_error!(double_fault, stack, { +interrupt_error!(double_fault, |stack, _code| { println!("Double fault"); stack.dump(); stack_trace(); - ksignal(SIGSEGV); + loop { + interrupt::disable(); + interrupt::halt(); + } }); -interrupt_error!(invalid_tss, stack, { +interrupt_error!(invalid_tss, |stack, code| { println!("Invalid TSS fault"); stack.dump(); stack_trace(); - ksignal(SIGSEGV); + excp_handler(Exception { + kind: 10, + code, + ..Default::default() + }); }); -interrupt_error!(segment_not_present, stack, { +interrupt_error!(segment_not_present, |stack, code| { println!("Segment not present fault"); stack.dump(); stack_trace(); - ksignal(SIGSEGV); + excp_handler(Exception { + kind: 11, + code, + ..Default::default() + }); }); -interrupt_error!(stack_segment, stack, { +interrupt_error!(stack_segment, |stack, code| { println!("Stack segment fault"); stack.dump(); stack_trace(); - ksignal(SIGSEGV); + excp_handler(Exception { + kind: 12, + code, + ..Default::default() + }); }); -interrupt_error!(protection, stack, { - println!("Protection fault"); +interrupt_error!(protection, |stack, code| { + println!("Protection fault code={:#0x}", code); stack.dump(); stack_trace(); - ksignal(SIGSEGV); + excp_handler(Exception { + kind: 13, + code, + ..Default::default() + }); }); -interrupt_error!(page, stack, { - let cr2: usize; - asm!("mov rax, cr2" : "={rax}"(cr2) : : : "intel", "volatile"); - println!("Page fault: {:>016X}", cr2); - stack.dump(); - stack_trace(); - ksignal(SIGSEGV); +interrupt_error!(page, |stack, code| { + let cr2 = VirtualAddress::new(unsafe { x86::controlregs::cr2() }); + let arch_flags = PageFaultError::from_bits_truncate(code as u32); + let mut generic_flags = GenericPfFlags::empty(); + + generic_flags.set( + GenericPfFlags::PRESENT, + arch_flags.contains(PageFaultError::P), + ); + generic_flags.set( + GenericPfFlags::INVOLVED_WRITE, + arch_flags.contains(PageFaultError::WR), + ); + generic_flags.set( + GenericPfFlags::USER_NOT_SUPERVISOR, + arch_flags.contains(PageFaultError::US), + ); + generic_flags.set( + GenericPfFlags::INVL, + arch_flags.contains(PageFaultError::RSVD), + ); + generic_flags.set( + GenericPfFlags::INSTR_NOT_DATA, + arch_flags.contains(PageFaultError::ID), + ); + + if crate::memory::page_fault_handler(stack, generic_flags, cr2).is_err() { + println!("Page fault: {:>016X} {:#?}", cr2.data(), arch_flags); + stack.dump(); + stack_trace(); + excp_handler(Exception { + kind: 14, + code, + address: cr2.data(), + }); + } }); -interrupt_stack!(fpu, stack, { +interrupt_stack!(fpu_fault, |stack| { println!("FPU floating point fault"); stack.dump(); stack_trace(); - ksignal(SIGFPE); + excp_handler(Exception { + kind: 16, + ..Default::default() + }); }); -interrupt_error!(alignment_check, stack, { +interrupt_error!(alignment_check, |stack, code| { println!("Alignment check fault"); stack.dump(); stack_trace(); - ksignal(SIGBUS); + excp_handler(Exception { + kind: 17, + code, + ..Default::default() + }); }); -interrupt_stack!(machine_check, stack, { +interrupt_stack!(machine_check, @paranoid, |stack| { println!("Machine check fault"); stack.dump(); stack_trace(); - ksignal(SIGBUS); + loop { + interrupt::disable(); + interrupt::halt(); + } }); -interrupt_stack!(simd, stack, { +interrupt_stack!(simd, |stack| { println!("SIMD floating point fault"); stack.dump(); + let mut mxcsr = 0_usize; + core::arch::asm!("stmxcsr [{}]", in(reg) core::ptr::addr_of_mut!(mxcsr)); + println!("MXCSR {:#0x}", mxcsr); stack_trace(); - ksignal(SIGFPE); + excp_handler(Exception { + kind: 19, + ..Default::default() + }); }); -interrupt_stack!(virtualization, stack, { +interrupt_stack!(virtualization, |stack| { println!("Virtualization fault"); stack.dump(); stack_trace(); - ksignal(SIGBUS); + loop { + interrupt::disable(); + interrupt::halt(); + } }); -interrupt_error!(security, stack, { +interrupt_error!(security, |stack, _code| { println!("Security exception"); stack.dump(); stack_trace(); - ksignal(SIGBUS); + loop { + interrupt::disable(); + interrupt::halt(); + } }); diff --git a/src/arch/x86_64/interrupt/handler.rs b/src/arch/x86_64/interrupt/handler.rs new file mode 100644 index 00000000..547e0c38 --- /dev/null +++ b/src/arch/x86_64/interrupt/handler.rs @@ -0,0 +1,526 @@ +use crate::{memory::ArchIntCtx, syscall::IntRegisters}; + +use super::super::flags::*; + +#[derive(Default)] +#[repr(C)] +pub struct ScratchRegisters { + pub r11: usize, + pub r10: usize, + pub r9: usize, + pub r8: usize, + pub rsi: usize, + pub rdi: usize, + pub rdx: usize, + pub rcx: usize, + pub rax: usize, +} + +impl ScratchRegisters { + pub fn dump(&self) { + println!("RAX: {:016x}", { self.rax }); + println!("RCX: {:016x}", { self.rcx }); + println!("RDX: {:016x}", { self.rdx }); + println!("RDI: {:016x}", { self.rdi }); + println!("RSI: {:016x}", { self.rsi }); + println!("R8: {:016x}", { self.r8 }); + println!("R9: {:016x}", { self.r9 }); + println!("R10: {:016x}", { self.r10 }); + println!("R11: {:016x}", { self.r11 }); + } +} + +#[derive(Default)] +#[repr(C)] +pub struct PreservedRegisters { + pub r15: usize, + pub r14: usize, + pub r13: usize, + pub r12: usize, + pub rbp: usize, + pub rbx: usize, +} + +impl PreservedRegisters { + pub fn dump(&self) { + println!("RBX: {:016x}", { self.rbx }); + println!("RBP: {:016x}", { self.rbp }); + println!("R12: {:016x}", { self.r12 }); + println!("R13: {:016x}", { self.r13 }); + println!("R14: {:016x}", { self.r14 }); + println!("R15: {:016x}", { self.r15 }); + } +} + +#[derive(Default)] +#[repr(C)] +pub struct IretRegisters { + pub rip: usize, + pub cs: usize, + pub rflags: usize, + + // In x86 Protected Mode, i.e. 32-bit kernels, the following two registers are conditionally + // pushed if the privilege ring changes. In x86 Long Mode however, i.e. 64-bit kernels, they + // are unconditionally pushed, mostly due to stack alignment requirements. + pub rsp: usize, + pub ss: usize, +} + +impl IretRegisters { + pub fn dump(&self) { + println!("RFLAG: {:016x}", { self.rflags }); + println!("CS: {:016x}", { self.cs }); + println!("RIP: {:016x}", { self.rip }); + + println!("RSP: {:016x}", { self.rsp }); + println!("SS: {:016x}", { self.ss }); + + unsafe { + let fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE); + let gsbase = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE); + let kgsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE); + println!( + "FSBASE {:016x}\nGSBASE {:016x}\nKGSBASE {:016x}", + fsbase, gsbase, kgsbase + ); + } + } +} + +#[derive(Default)] +#[repr(C)] +pub struct InterruptStack { + pub preserved: PreservedRegisters, + pub scratch: ScratchRegisters, + pub iret: IretRegisters, +} + +impl InterruptStack { + pub fn init(&mut self) { + // Always enable interrupts! + self.iret.rflags = x86::bits64::rflags::RFlags::FLAGS_IF.bits() as usize; + self.iret.cs = (crate::gdt::GDT_USER_CODE << 3) | 3; + self.iret.ss = (crate::gdt::GDT_USER_DATA << 3) | 3; + } + pub fn set_stack_pointer(&mut self, rsp: usize) { + self.iret.rsp = rsp; + } + pub fn instr_pointer(&self) -> usize { + self.iret.rip + } + pub fn sig_archdep_reg(&self) -> usize { + self.iret.rflags + } + pub fn set_instr_pointer(&mut self, rip: usize) { + self.iret.rip = rip; + } + + pub fn dump(&self) { + self.iret.dump(); + self.scratch.dump(); + self.preserved.dump(); + } + /// Saves all registers to a struct used by the proc: + /// scheme to read/write registers. + pub fn save(&self, all: &mut IntRegisters) { + all.r15 = self.preserved.r15; + all.r14 = self.preserved.r14; + all.r13 = self.preserved.r13; + all.r12 = self.preserved.r12; + all.rbp = self.preserved.rbp; + all.rbx = self.preserved.rbx; + all.r11 = self.scratch.r11; + all.r10 = self.scratch.r10; + all.r9 = self.scratch.r9; + all.r8 = self.scratch.r8; + all.rsi = self.scratch.rsi; + all.rdi = self.scratch.rdi; + all.rdx = self.scratch.rdx; + all.rcx = self.scratch.rcx; + all.rax = self.scratch.rax; + all.rip = self.iret.rip; + all.cs = self.iret.cs; + all.rflags = self.iret.rflags; + all.rsp = self.iret.rsp; + all.ss = self.iret.ss; + } + /// Loads all registers from a struct used by the proc: + /// scheme to read/write registers. + pub fn load(&mut self, all: &IntRegisters) { + self.preserved.r15 = all.r15; + self.preserved.r14 = all.r14; + self.preserved.r13 = all.r13; + self.preserved.r12 = all.r12; + self.preserved.rbp = all.rbp; + self.preserved.rbx = all.rbx; + self.scratch.r11 = all.r11; + self.scratch.r10 = all.r10; + self.scratch.r9 = all.r9; + self.scratch.r8 = all.r8; + self.scratch.rsi = all.rsi; + self.scratch.rdi = all.rdi; + self.scratch.rdx = all.rdx; + self.scratch.rcx = all.rcx; + self.scratch.rax = all.rax; + self.iret.rip = all.rip; + self.iret.rsp = all.rsp; + + // CS and SS are immutable, at least their privilege levels. + + // OF, DF, 0, TF => D + // SF, ZF, 0, AF => D + // 0, PF, 1, CF => 5 + const ALLOWED_RFLAGS: usize = 0xDD5; + + self.iret.rflags &= !ALLOWED_RFLAGS; + self.iret.rflags |= all.rflags & ALLOWED_RFLAGS; + } + /// Enables the "Trap Flag" in the FLAGS register, causing the CPU + /// to send a Debug exception after the next instruction. This is + /// used for singlestep in the proc: scheme. + pub fn set_singlestep(&mut self, enabled: bool) { + if enabled { + self.iret.rflags |= FLAG_SINGLESTEP; + } else { + self.iret.rflags &= !FLAG_SINGLESTEP; + } + } +} + +#[macro_export] +macro_rules! push_scratch { + () => { + " + // Push scratch registers + push rcx + push rdx + push rdi + push rsi + push r8 + push r9 + push r10 + push r11 + " + }; +} +#[macro_export] +macro_rules! pop_scratch { + () => { + " + // Pop scratch registers + pop r11 + pop r10 + pop r9 + pop r8 + pop rsi + pop rdi + pop rdx + pop rcx + pop rax + " + }; +} + +#[macro_export] +macro_rules! push_preserved { + () => { + " + // Push preserved registers + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + " + }; +} +#[macro_export] +macro_rules! pop_preserved { + () => { + " + // Pop preserved registers + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + " + }; +} +macro_rules! swapgs_iff_ring3_fast { + // TODO: Spectre V1: LFENCE? + () => { + " + // Check whether the last two bits RSP+8 (code segment) are equal to zero. + test QWORD PTR [rsp + 8], 0x3 + // Skip the SWAPGS instruction if CS & 0b11 == 0b00. + jz 2f + swapgs + 2: + " + }; +} +macro_rules! swapgs_iff_ring3_fast_errorcode { + // TODO: Spectre V1: LFENCE? + () => { + " + test QWORD PTR [rsp + 16], 0x3 + jz 2f + swapgs + 2: + " + }; +} + +macro_rules! conditional_swapgs_paranoid { + // For regular interrupt handlers and the syscall handler, managing IA32_GS_BASE and + // IA32_KERNEL_GS_BASE (the "GSBASE registers") is more or less trivial when using the SWAPGS + // instruction. + // + // The syscall handler simply runs SWAPGS, as syscalls can only originate from usermode, + // whereas interrupt handlers conditionally SWAPGS unless the interrupt was triggered from + // kernel mode, in which case the "swap state" is already valid, and there is no need to + // SWAPGS. + // + // Handling GSBASE correctly for paranoid interrupts however, is not as simple. NMIs can occur + // between the check of whether an interrupt came from usermode, and the actual SWAPGS + // instruction. #DB can also be triggered inside of a kernel interrupt handler, due to + // breakpoints, even though setting up such breakpoints in the first place, is not yet + // supported by the kernel. + // + // Luckily, the GDT always resides in the PCR (at least after init_paging, but there are no + // interrupt handlers set up before that), allowing GSBASE to be calculated relatively cheaply. + // Out of the two GSBASE registers, at least one must be *the* kernel GSBASE, allowing for a + // simple conditional SWAPGS. + // + // (An alternative to conditionally executing SWAPGS, would be to save and restore GSBASE via + // e.g. the stack. That would nonetheless require saving and restoring both GSBASE registers, + // if the interrupt handler should be allowed to context switch, which the current #DB handler + // may do.) + // + // TODO: Handle nested NMIs like Linux does (https://lwn.net/Articles/484932/)?. + + () => { concat!( + // Put the GDT base pointer in RDI. + " + sub rsp, 16 + sgdt [rsp + 6] + mov rdi, [rsp + 8] + add rsp, 16 + ", + // Calculate the PCR address by subtracting the offset of the GDT in the PCR struct. + "sub rdi, {PCR_GDT_OFFSET};", + + // Read the current IA32_GS_BASE value into RDX. + alternative!( + feature: "fsgsbase", + then: ["rdgsbase rdx"], + default: [" + mov ecx, {IA32_GS_BASE} + rdmsr + shl rdx, 32 + or rdx, rax + "] + ), + + // If they were not equal, the PCR address must instead be in IA32_KERNEL_GS_BASE, + // requiring a SWAPGS. GSBASE needs to be swapped back, so store the same flag in RBX. + + // TODO: Spectre V1: LFENCE? + " + cmp rdx, rdi + sete bl + je 2f + swapgs + 2: + ", + ) } +} +macro_rules! conditional_swapgs_back_paranoid { + () => { + " + test bl, bl + jnz 2f + swapgs + 2: + " + }; +} +macro_rules! nop { + () => { + " + // Unused: {IA32_GS_BASE} {PCR_GDT_OFFSET} + " + }; +} + +#[macro_export] +macro_rules! interrupt_stack { + // XXX: Apparently we cannot use $expr and check for bool exhaustiveness, so we will have to + // use idents directly instead. + ($name:ident, $save1:ident!, $save2:ident!, $rstor2:ident!, $rstor1:ident!, is_paranoid: $is_paranoid:expr, |$stack:ident| $code:block) => { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "C" fn inner($stack: &mut $crate::arch::x86_64::interrupt::InterruptStack) { + #[allow(unused_unsafe)] + unsafe { + $code + } + } + core::arch::naked_asm!(concat!( + // Clear direction flag, required by ABI when running any Rust code in the kernel. + "cld;", + + // Backup all userspace registers to stack + $save1!(), + "push rax\n", + push_scratch!(), + push_preserved!(), + + $save2!(), + + // TODO: Map PTI + // $crate::arch::x86_64::pti::map(); + + // Call inner function with pointer to stack + " + mov rdi, rsp + call {inner} + ", + + // TODO: Unmap PTI + // $crate::arch::x86_64::pti::unmap(); + + $rstor2!(), + + // Restore all userspace registers + pop_preserved!(), + pop_scratch!(), + + $rstor1!(), + "iretq\n", + ), + + inner = sym inner, + IA32_GS_BASE = const(x86::msr::IA32_GS_BASE), + + PCR_GDT_OFFSET = const(core::mem::offset_of!(crate::gdt::ProcessorControlRegion, gdt)), + ); + } + }; + ($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, swapgs_iff_ring3_fast!, nop!, nop!, swapgs_iff_ring3_fast!, is_paranoid: false, |$stack| $code); }; + ($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, nop!, conditional_swapgs_paranoid!, conditional_swapgs_back_paranoid!, nop!, is_paranoid: true, |$stack| $code); } +} + +#[macro_export] +macro_rules! interrupt { + ($name:ident, || $code:block) => { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "C" fn inner() { + $code + } + + core::arch::naked_asm!(concat!( + // Clear direction flag, required by ABI when running any Rust code in the kernel. + "cld;", + + // Backup all userspace registers to stack + swapgs_iff_ring3_fast!(), + "push rax\n", + push_scratch!(), + + // TODO: Map PTI + // $crate::arch::x86_64::pti::map(); + + // Call inner function with pointer to stack + "call {inner}\n", + + // TODO: Unmap PTI + // $crate::arch::x86_64::pti::unmap(); + + // Restore all userspace registers + pop_scratch!(), + + swapgs_iff_ring3_fast!(), + "iretq\n", + ), + + inner = sym inner, + ); + } + }; +} + +#[macro_export] +macro_rules! interrupt_error { + ($name:ident, |$stack:ident, $error_code:ident| $code:block) => { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "C" fn inner($stack: &mut $crate::arch::x86_64::interrupt::handler::InterruptStack, $error_code: usize) { + #[allow(unused_unsafe)] + unsafe { + $code + } + } + + core::arch::naked_asm!(concat!( + // Clear direction flag, required by ABI when running any Rust code in the kernel. + "cld;", + + swapgs_iff_ring3_fast_errorcode!(), + + // Don't push RAX yet, as the error code is already stored in RAX's position. + + // Push all userspace registers + push_scratch!(), + push_preserved!(), + + // Now that we have a couple of usable registers, put the error code in the second + // argument register for the inner function, and save RAX where it would normally + // be. + "mov rsi, [rsp + {rax_offset}];", + "mov [rsp + {rax_offset}], rax;", + + // TODO: Map PTI + // $crate::arch::x86_64::pti::map(); + + // Call inner function with pointer to stack, and error code. + "mov rdi, rsp;", + "call {inner};", + + // TODO: Unmap PTI + // $crate::arch::x86_64::pti::unmap(); + + // Restore all userspace registers + pop_preserved!(), + pop_scratch!(), + + // The error code has already been popped, so use the regular macro. + swapgs_iff_ring3_fast!(), + "iretq;", + ), + + inner = sym inner, + rax_offset = const(::core::mem::size_of::<$crate::interrupt::handler::PreservedRegisters>() + ::core::mem::size_of::<$crate::interrupt::handler::ScratchRegisters>() - 8), + ); + } + }; +} + +impl ArchIntCtx for InterruptStack { + fn ip(&self) -> usize { + self.iret.rip + } + fn recover_and_efault(&mut self) { + // We were inside a usercopy function that failed. This is handled by setting rax to a + // nonzero value, and emulating the ret instruction. + self.scratch.rax = 1; + let ret_addr = unsafe { (self.iret.rsp as *const usize).read() }; + self.iret.rsp += 8; + self.iret.rip = ret_addr; + self.iret.rflags &= !(1 << 18); + } +} diff --git a/src/arch/x86_64/interrupt/ipi.rs b/src/arch/x86_64/interrupt/ipi.rs deleted file mode 100644 index 18113c25..00000000 --- a/src/arch/x86_64/interrupt/ipi.rs +++ /dev/null @@ -1,30 +0,0 @@ -use core::sync::atomic::Ordering; -use x86::shared::tlb; - -use crate::context; -use crate::device::local_apic::LOCAL_APIC; -use super::irq::PIT_TICKS; - -interrupt!(wakeup, { - LOCAL_APIC.eoi(); -}); - -interrupt!(tlb, { - LOCAL_APIC.eoi(); - - tlb::flush_all(); -}); - -interrupt!(switch, { - LOCAL_APIC.eoi(); - - let _ = context::switch(); -}); - -interrupt!(pit, { - LOCAL_APIC.eoi(); - - if PIT_TICKS.fetch_add(1, Ordering::SeqCst) >= 10 { - let _ = context::switch(); - } -}); diff --git a/src/arch/x86_64/interrupt/irq.rs b/src/arch/x86_64/interrupt/irq.rs index 8c755baa..0b10a327 100644 --- a/src/arch/x86_64/interrupt/irq.rs +++ b/src/arch/x86_64/interrupt/irq.rs @@ -1,57 +1,170 @@ use core::sync::atomic::{AtomicUsize, Ordering}; -use crate::context::timeout; -use crate::device::pic; -use crate::device::serial::{COM1, COM2}; -use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::scheme::debug::debug_input; -use crate::{context, ptrace, time}; +use alloc::vec::Vec; -//resets to 0 in context::switch() -pub static PIT_TICKS: AtomicUsize = AtomicUsize::new(0); +#[cfg(feature = "sys_stat")] +use crate::percpu::PercpuBlock; +use crate::{ + context::{self, timeout}, + device::{ + ioapic, local_apic, pic, pit, + serial::{COM1, COM2}, + }, + interrupt, interrupt_stack, + ipi::{ipi, IpiKind, IpiTarget}, + scheme::{ + debug::{debug_input, debug_notify}, + serio::serio_input, + }, + time, +}; -unsafe fn trigger(irq: u8) { - extern { - fn irq_trigger(irq: u8); +#[repr(u8)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum IrqMethod { + Pic = 0, + Apic = 1, +} + +static SPURIOUS_COUNT_IRQ7: AtomicUsize = AtomicUsize::new(0); +static SPURIOUS_COUNT_IRQ15: AtomicUsize = AtomicUsize::new(0); + +pub fn spurious_count_irq7() -> usize { + SPURIOUS_COUNT_IRQ7.load(Ordering::Relaxed) +} +pub fn spurious_count_irq15() -> usize { + SPURIOUS_COUNT_IRQ15.load(Ordering::Relaxed) +} +pub fn spurious_count() -> usize { + spurious_count_irq7() + spurious_count_irq15() +} +pub fn spurious_irq_resource() -> syscall::Result> { + match irq_method() { + IrqMethod::Apic => Ok(Vec::from(&b"(not implemented for APIC yet)"[..])), + IrqMethod::Pic => Ok(format!( + "{}\tIRQ7\n{}\tIRQ15\n{}\ttotal\n", + spurious_count_irq7(), + spurious_count_irq15(), + spurious_count() + ) + .into_bytes()), } +} - if irq < 16 { - if irq >= 8 { - pic::SLAVE.mask_set(irq - 8); - pic::MASTER.ack(); - pic::SLAVE.ack(); - } else { - pic::MASTER.mask_set(irq); - pic::MASTER.ack(); - } +static IRQ_METHOD: AtomicUsize = AtomicUsize::new(IrqMethod::Pic as usize); + +pub fn set_irq_method(method: IrqMethod) { + IRQ_METHOD.store(method as usize, core::sync::atomic::Ordering::Release); +} + +fn irq_method() -> IrqMethod { + let raw = IRQ_METHOD.load(core::sync::atomic::Ordering::Acquire); + + match raw { + 0 => IrqMethod::Pic, + 1 => IrqMethod::Apic, + _ => unreachable!(), } +} + +extern "C" { + // triggers irq scheme + fn irq_trigger(irq: u8); +} +/// Notify the IRQ scheme that an IRQ has been registered. This should mask the IRQ until the +/// scheme user unmasks it ("acknowledges" it). +unsafe fn trigger(irq: u8) { + match irq_method() { + IrqMethod::Pic => { + if irq < 16 { + pic_mask(irq) + } + } + IrqMethod::Apic => ioapic_mask(irq), + } irq_trigger(irq); } +/// Unmask the IRQ. This is called from the IRQ scheme, which does this when a user process has +/// processed the IRQ. pub unsafe fn acknowledge(irq: usize) { - if irq < 16 { - if irq >= 8 { - pic::SLAVE.mask_clear(irq as u8 - 8); - } else { - pic::MASTER.mask_clear(irq as u8); + match irq_method() { + IrqMethod::Pic => { + if irq < 16 { + pic_unmask(irq) + } } + IrqMethod::Apic => ioapic_unmask(irq), } } -interrupt_stack!(pit, stack, { - // Saves CPU time by not sending IRQ event irq_trigger(0); +/// Sends an end-of-interrupt, so that the interrupt controller can go on to the next one. +pub unsafe fn eoi(irq: u8) { + #[cfg(feature = "sys_stat")] + PercpuBlock::current().stats.add_irq(irq); + + match irq_method() { + IrqMethod::Pic => { + if irq < 16 { + pic_eoi(irq) + } + } + IrqMethod::Apic => lapic_eoi(), + } +} - const PIT_RATE: u64 = 2_250_286; +unsafe fn pic_mask(irq: u8) { + debug_assert!(irq < 16); + + if irq >= 8 { + pic::slave().mask_set(irq - 8); + } else { + pic::master().mask_set(irq); + } +} + +unsafe fn ioapic_mask(irq: u8) { + ioapic::mask(irq); +} + +unsafe fn pic_eoi(irq: u8) { + debug_assert!(irq < 16); + + if irq >= 8 { + pic::master().ack(); + pic::slave().ack(); + } else { + pic::master().ack(); + } +} + +unsafe fn lapic_eoi() { + local_apic::the_local_apic().eoi() +} + +unsafe fn pic_unmask(irq: usize) { + debug_assert!(irq < 16); + + if irq >= 8 { + pic::slave().mask_clear(irq as u8 - 8); + } else { + pic::master().mask_clear(irq as u8); + } +} + +unsafe fn ioapic_unmask(irq: usize) { + ioapic::unmask(irq as u8); +} + +interrupt_stack!(pit_stack, |_stack| { + // Saves CPU time by not sending IRQ event irq_trigger(0); { - let mut offset = time::OFFSET.lock(); - let sum = offset.1 + PIT_RATE; - offset.1 = sum % 1_000_000_000; - offset.0 += sum / 1_000_000_000; + *time::OFFSET.lock() += pit::RATE; } - pic::MASTER.ack(); + eoi(0); // Wake up other CPUs ipi(IpiKind::Pit, IpiTarget::Other); @@ -59,75 +172,152 @@ interrupt_stack!(pit, stack, { // Any better way of doing this? timeout::trigger(); - if PIT_TICKS.fetch_add(1, Ordering::SeqCst) >= 10 { - let _guard = ptrace::set_process_regs(stack); - let _ = context::switch(); - } + // Switch after a sufficient amount of time since the last switch. + context::switch::tick(); }); -interrupt!(keyboard, { - trigger(1); +interrupt!(keyboard, || { + let data: u8; + core::arch::asm!("in al, 0x60", out("al") data); + + eoi(1); + + serio_input(0, data); }); -interrupt!(cascade, { +interrupt!(cascade, || { // No need to do any operations on cascade - pic::MASTER.ack(); + eoi(2); }); -interrupt!(com2, { +interrupt!(com2, || { while let Some(c) = COM2.lock().receive() { debug_input(c); } - pic::MASTER.ack(); + debug_notify(); + eoi(3); }); -interrupt!(com1, { +interrupt!(com1, || { while let Some(c) = COM1.lock().receive() { debug_input(c); } - pic::MASTER.ack(); + debug_notify(); + eoi(4); }); -interrupt!(lpt2, { +interrupt!(lpt2, || { trigger(5); + eoi(5); }); -interrupt!(floppy, { +interrupt!(floppy, || { trigger(6); + eoi(6); }); -interrupt!(lpt1, { +interrupt!(lpt1, || { + if irq_method() == IrqMethod::Pic && pic::master().isr() & (1 << 7) == 0 { + // the IRQ was spurious, ignore it but increment a counter. + SPURIOUS_COUNT_IRQ7.fetch_add(1, Ordering::Relaxed); + return; + } trigger(7); + eoi(7); }); -interrupt!(rtc, { +interrupt!(rtc, || { trigger(8); + eoi(8); }); -interrupt!(pci1, { +interrupt!(pci1, || { trigger(9); + eoi(9); }); -interrupt!(pci2, { +interrupt!(pci2, || { trigger(10); + eoi(10); }); -interrupt!(pci3, { +interrupt!(pci3, || { trigger(11); + eoi(11); }); -interrupt!(mouse, { - trigger(12); +interrupt!(mouse, || { + let data: u8; + core::arch::asm!("in al, 0x60", out("al") data); + + eoi(12); + + serio_input(1, data); }); -interrupt!(fpu, { +interrupt!(fpu, || { trigger(13); + eoi(13); }); -interrupt!(ata1, { +interrupt!(ata1, || { trigger(14); + eoi(14); }); -interrupt!(ata2, { +interrupt!(ata2, || { + if irq_method() == IrqMethod::Pic && pic::slave().isr() & (1 << 7) == 0 { + SPURIOUS_COUNT_IRQ15.fetch_add(1, Ordering::Relaxed); + pic::master().ack(); + return; + } trigger(15); + eoi(15); }); + +interrupt!(lapic_timer, || { + println!("Local apic timer interrupt"); + lapic_eoi(); +}); +#[cfg(feature = "profiling")] +interrupt!(aux_timer, || { + lapic_eoi(); + crate::ipi::ipi(IpiKind::Profile, IpiTarget::Other); +}); + +interrupt!(lapic_error, || { + log::error!( + "Local apic internal error: ESR={:#0x}", + local_apic::the_local_apic().esr() + ); + lapic_eoi(); +}); + +interrupt_error!(generic_irq, |_stack, code| { + // The reason why 128 is subtracted and added from the code, is that PUSH imm8 sign-extends the + // value, and the longer PUSH imm32 would make the generic_interrupts table twice as large + // (containing lots of useless NOPs). + irq_trigger((code as i32).wrapping_add(128) as u8); + + lapic_eoi(); +}); + +core::arch::global_asm!(" + .globl __generic_interrupts_start + .globl __generic_interrupts_end + .p2align 3 +__generic_interrupts_start: + n = 0 + .rept 224 + push (n - 128) + jmp {} + .p2align 3 + n = n + 1 + .endr +__generic_interrupts_end: +", sym generic_irq); + +extern "C" { + pub fn __generic_interrupts_start(); + pub fn __generic_interrupts_end(); +} diff --git a/src/arch/x86_64/interrupt/mod.rs b/src/arch/x86_64/interrupt/mod.rs index 85686d96..430961dd 100644 --- a/src/arch/x86_64/interrupt/mod.rs +++ b/src/arch/x86_64/interrupt/mod.rs @@ -1,54 +1,12 @@ //! Interrupt instructions +pub use crate::arch::x86_shared::interrupt::*; + +#[macro_use] +pub mod handler; + pub mod exception; -pub mod ipi; pub mod irq; pub mod syscall; -pub mod trace; - -pub use self::trace::stack_trace; - -/// Clear interrupts -#[inline(always)] -pub unsafe fn disable() { - asm!("cli" : : : : "intel", "volatile"); -} - -/// Set interrupts -#[inline(always)] -pub unsafe fn enable() { - asm!("sti" : : : : "intel", "volatile"); -} - -/// Set interrupts and halt -/// This will atomically wait for the next interrupt -/// Performing enable followed by halt is not guaranteed to be atomic, use this instead! -#[inline(always)] -pub unsafe fn enable_and_halt() { - asm!("sti - hlt" - : : : : "intel", "volatile"); -} - -/// Set interrupts and nop -/// This will enable interrupts and allow the IF flag to be processed -/// Simply enabling interrupts does not gurantee that they will trigger, use this instead! -#[inline(always)] -pub unsafe fn enable_and_nop() { - asm!("sti - nop" - : : : : "intel", "volatile"); -} - -/// Halt instruction -#[inline(always)] -pub unsafe fn halt() { - asm!("hlt" : : : : "intel", "volatile"); -} -/// Pause instruction -/// Safe because it is similar to a NOP, and has no memory effects -#[inline(always)] -pub fn pause() { - unsafe { asm!("pause" : : : : "intel", "volatile"); } -} +pub use self::handler::InterruptStack; diff --git a/src/arch/x86_64/interrupt/syscall.rs b/src/arch/x86_64/interrupt/syscall.rs index 6b9dfe61..82890853 100644 --- a/src/arch/x86_64/interrupt/syscall.rs +++ b/src/arch/x86_64/interrupt/syscall.rs @@ -1,150 +1,197 @@ -use crate::arch::macros::InterruptStack; -use crate::arch::{gdt, pti}; -use crate::{ptrace, syscall}; -use x86::shared::msr; +use crate::{ + arch::{gdt, interrupt::InterruptStack}, + ptrace, syscall, + syscall::flag::{PTRACE_FLAG_IGNORE, PTRACE_STOP_POST_SYSCALL, PTRACE_STOP_PRE_SYSCALL}, +}; +use core::mem::offset_of; +use x86::{ + bits64::{rflags::RFlags, task::TaskStateSegment}, + msr, + segmentation::SegmentSelector, +}; pub unsafe fn init() { - msr::wrmsr(msr::IA32_STAR, ((gdt::GDT_KERNEL_CODE as u64) << 3) << 32); + // IA32_STAR[31:0] are reserved. + + // The base selector of the two consecutive segments for kernel code and the immediately + // suceeding stack (data). + let syscall_cs_ss_base = (gdt::GDT_KERNEL_CODE as u16) << 3; + // The base selector of the three consecutive segments (of which two are used) for user code + // and user data. It points to a 32-bit code segment, which must be followed by a data segment + // (stack), and a 64-bit code segment. + let sysret_cs_ss_base = ((gdt::GDT_USER_CODE32_UNUSED as u16) << 3) | 3; + let star_high = u32::from(syscall_cs_ss_base) | (u32::from(sysret_cs_ss_base) << 16); + + msr::wrmsr(msr::IA32_STAR, u64::from(star_high) << 32); msr::wrmsr(msr::IA32_LSTAR, syscall_instruction as u64); - msr::wrmsr(msr::IA32_FMASK, 0x0300); // Clear trap flag and interrupt enable - msr::wrmsr(msr::IA32_KERNEL_GS_BASE, &gdt::TSS as *const _ as u64); + + // DF needs to be cleared, required by the compiler ABI. If DF were not part of FMASK, + // userspace would be able to reverse the direction of in-kernel REP MOVS/STOS/(CMPS/SCAS), and + // cause all sorts of memory corruption. + // + // IF needs to be cleared, as the kernel currently assumes interrupts are disabled except in + // usermode and in kmain. + // + // TF needs to be cleared, as enabling userspace-rflags-controlled singlestep in the kernel + // would be a bad idea. + // + // AC it should always be cleared when entering the kernel (and never be set except in usercopy + // functions), if for some reason AC was set before entering userspace (AC can only be modified + // by kernel code). + // + // The other flags could indeed be preserved and excluded from FMASK, but since they are not + // used to pass data to the kernel, they might as well be masked with *marginal* security + // benefits. + // + // Flags not included here are IOPL (not relevant to the kernel at all), "CPUID flag" (not used + // at all in 64-bit mode), RF (not used yet, but DR breakpoints would remain enabled both in + // user and kernel mode), VM8086 (not used at all), and VIF/VIP (system-level status flags?). + + let mask_critical = RFlags::FLAGS_DF | RFlags::FLAGS_IF | RFlags::FLAGS_TF | RFlags::FLAGS_AC; + let mask_other = RFlags::FLAGS_CF + | RFlags::FLAGS_PF + | RFlags::FLAGS_AF + | RFlags::FLAGS_ZF + | RFlags::FLAGS_SF + | RFlags::FLAGS_OF; + msr::wrmsr(msr::IA32_FMASK, (mask_critical | mask_other).bits()); let efer = msr::rdmsr(msr::IA32_EFER); msr::wrmsr(msr::IA32_EFER, efer | 1); } -// Not a function pointer because it somehow messes up the returning -// from clone() (via clone_ret()). Not sure what the problem is. -macro_rules! with_interrupt_stack { - (unsafe fn $wrapped:ident($stack:ident) -> usize $code:block) => { - #[inline(never)] - unsafe fn $wrapped(stack: *mut InterruptStack) { - let _guard = ptrace::set_process_regs(stack); - - let is_sysemu = ptrace::breakpoint_callback(syscall::flag::PTRACE_SYSCALL) - .map(|fl| fl & syscall::flag::PTRACE_SYSEMU == syscall::flag::PTRACE_SYSEMU); - if !is_sysemu.unwrap_or(false) { - // If not on a sysemu breakpoint - let $stack = &mut *stack; - $stack.scratch.rax = $code; - - if is_sysemu.is_some() { - // Only callback if there was a pre-syscall - // callback too. - ptrace::breakpoint_callback(::syscall::PTRACE_SYSCALL); - } - } - } - } -} - -#[naked] -pub unsafe extern fn syscall_instruction() { - with_interrupt_stack! { - unsafe fn inner(stack) -> usize { - let rbp; - asm!("" : "={rbp}"(rbp) : : : "intel", "volatile"); - - let scratch = &stack.scratch; - syscall::syscall(scratch.rax, scratch.rdi, scratch.rsi, scratch.rdx, scratch.r10, scratch.r8, rbp, stack) - } +#[no_mangle] +pub unsafe extern "C" fn __inner_syscall_instruction(stack: *mut InterruptStack) { + let allowed = ptrace::breakpoint_callback(PTRACE_STOP_PRE_SYSCALL, None) + .and_then(|_| ptrace::next_breakpoint().map(|f| !f.contains(PTRACE_FLAG_IGNORE))); + + if allowed.unwrap_or(true) { + let scratch = &(*stack).scratch; + + let ret = syscall::syscall( + scratch.rax, + scratch.rdi, + scratch.rsi, + scratch.rdx, + scratch.r10, + scratch.r8, + ); + (*stack).scratch.rax = ret; } - // Yes, this is magic. No, you don't need to understand - asm!(" - swapgs // Set gs segment to TSS - mov gs:[28], rsp // Save userspace rsp - mov rsp, gs:[4] // Load kernel rsp - push 5 * 8 + 3 // Push userspace data segment - push qword ptr gs:[28] // Push userspace rsp - mov qword ptr gs:[28], 0 // Clear userspace rsp - push r11 // Push rflags - push 4 * 8 + 3 // Push userspace code segment - push rcx // Push userspace return pointer - swapgs // Restore gs - " - : - : - : - : "intel", "volatile"); - - // Push scratch registers - scratch_push!(); - preserved_push!(); - asm!("push fs - mov r11, 0x18 - mov fs, r11" - : : : : "intel", "volatile"); - - // Get reference to stack variables - let rsp: usize; - asm!("" : "={rsp}"(rsp) : : : "intel", "volatile"); - - // Map kernel - pti::map(); - - inner(rsp as *mut InterruptStack); - - // Unmap kernel - pti::unmap(); - - // Interrupt return - asm!("pop fs" : : : : "intel", "volatile"); - preserved_pop!(); - scratch_pop!(); - asm!("iretq" : : : : "intel", "volatile"); + ptrace::breakpoint_callback(PTRACE_STOP_POST_SYSCALL, None); } #[naked] -pub unsafe extern fn syscall() { - with_interrupt_stack! { - unsafe fn inner(stack) -> usize { - let rbp; - asm!("" : "={rbp}"(rbp) : : : "intel", "volatile"); - - let scratch = &stack.scratch; - syscall::syscall(scratch.rax, stack.preserved.rbx, scratch.rcx, scratch.rdx, scratch.rsi, scratch.rdi, rbp, stack) - } - } - - // Push scratch registers - scratch_push!(); - preserved_push!(); - asm!("push fs - mov r11, 0x18 - mov fs, r11" - : : : : "intel", "volatile"); - - // Get reference to stack variables - let rsp: usize; - asm!("" : "={rsp}"(rsp) : : : "intel", "volatile"); - - // Map kernel - pti::map(); - - inner(rsp as *mut InterruptStack); - - // Unmap kernel - pti::unmap(); - - // Interrupt return - asm!("pop fs" : : : : "intel", "volatile"); - preserved_pop!(); - scratch_pop!(); - asm!("iretq" : : : : "intel", "volatile"); +#[allow(named_asm_labels)] +pub unsafe extern "C" fn syscall_instruction() { + core::arch::naked_asm!(concat!( + // Yes, this is magic. No, you don't need to understand + "swapgs;", // Swap KGSBASE with GSBASE, allowing fast TSS access. + "mov gs:[{sp}], rsp;", // Save userspace stack pointer + "mov rsp, gs:[{ksp}];", // Load kernel stack pointer + "push QWORD PTR {ss_sel};", // Push fake userspace SS (resembling iret frame) + "push QWORD PTR gs:[{sp}];", // Push userspace rsp + "push r11;", // Push rflags + "push QWORD PTR {cs_sel};", // Push fake CS (resembling iret stack frame) + "push rcx;", // Push userspace return pointer + + // Push context registers + "push rax;", + push_scratch!(), + push_preserved!(), + + // TODO: Map PTI + // $crate::arch::x86_64::pti::map(); + + // Call inner funtion + "mov rdi, rsp;", + "call __inner_syscall_instruction;", + + // TODO: Unmap PTI + // $crate::arch::x86_64::pti::unmap(); + + " + .globl enter_usermode + enter_usermode: + ", + + // Pop context registers + pop_preserved!(), + pop_scratch!(), + + // Restore user GSBASE by swapping GSBASE and KGSBASE. + "swapgs;", + + // TODO: Should we unconditionally jump or avoid jumping, to hint to the branch predictor that + // singlestep is NOT set? + // + // It appears Intel CPUs assume (previously unknown) forward conditional branches to not be + // taken, and AMD appears to assume all previously unknown conditional branches will not be + // taken. + + // Check if the Trap Flag (singlestep flag) is set. If so, sysretq will return to before the + // instruction, whereas debuggers expect the iretq behavior of returning to after the + // instruction. + + // TODO: Which one is faster? + // bt DWORD PTR [rsp + 16], 8 + // or, + // bt BYTE PTR [rsp + 17], 0 + // or, + // test BYTE PTR [rsp + 17], 1 + // or, + // test WORD PTR [rsp + 16], 0x100 + // or, + // test DWORD PTR [rsp + 16], 0x100 + // ? + + "test BYTE PTR [rsp + 17], 1;", + // If set, return using IRETQ instead. + "jnz 2f;", + + // Otherwise, continue with the fast sysretq. + + // Pop userspace return pointer + "pop rcx;", + + // We must ensure RCX is canonical; if it is not when running sysretq, the consequences can be + // fatal from a security perspective. + // + // See https://xenproject.org/2012/06/13/the-intel-sysret-privilege-escalation/. + // + // This is not just theoretical; ptrace allows userspace to change RCX (via RIP) of target + // processes. + // + // While we could also conditionally IRETQ here, an easier method is to simply sign-extend RCX: + + // Shift away the upper 16 bits (0xBAAD_8000_DEAD_BEEF => 0x8000_DEAD_BEEF_XXXX). + "shl rcx, 16;", + // Shift arithmetically right by 16 bits, effectively extending the 47th sign bit to bits + // 63:48 (0x8000_DEAD_BEEF_XXXX => 0xFFFF_8000_DEAD_BEEF). + "sar rcx, 16;", + + "add rsp, 8;", // Pop fake userspace CS + "pop r11;", // Pop rflags + "pop rsp;", // Restore userspace stack pointer + "sysretq;", // Return into userspace; RCX=>RIP,R11=>RFLAGS + + // IRETQ fallback: + " + .p2align 4 +2: + xor rcx, rcx + xor r11, r11 + iretq + "), + + sp = const(offset_of!(gdt::ProcessorControlRegion, user_rsp_tmp)), + ksp = const(offset_of!(gdt::ProcessorControlRegion, tss) + offset_of!(TaskStateSegment, rsp)), + ss_sel = const(SegmentSelector::new(gdt::GDT_USER_DATA as u16, x86::Ring::Ring3).bits()), + cs_sel = const(SegmentSelector::new(gdt::GDT_USER_CODE as u16, x86::Ring::Ring3).bits()), + ); } - -#[naked] -pub unsafe extern "C" fn clone_ret() { - // The C x86_64 ABI specifies that rbp is pushed to save the old - // call frame. Popping rbp means we're using the parent's call - // frame and thus will not only return from this function but also - // from the function above this one. - // When this is called, the stack should have been - // interrupt->inner->syscall->clone - // then changed to - // interrupt->inner->clone_ret->clone - // so this will return from "inner". - - asm!("pop rbp" : : : : "intel", "volatile"); +extern "C" { + // TODO: macro? + pub fn enter_usermode(); } diff --git a/src/arch/x86_64/interrupt/trace.rs b/src/arch/x86_64/interrupt/trace.rs deleted file mode 100644 index 07d465e9..00000000 --- a/src/arch/x86_64/interrupt/trace.rs +++ /dev/null @@ -1,90 +0,0 @@ -use core::{mem, str}; -use goblin::elf::sym; -use rustc_demangle::demangle; - -use crate::paging::{ActivePageTable, VirtualAddress}; - -/// Get a stack trace -//TODO: Check for stack being mapped before dereferencing -#[inline(never)] -pub unsafe fn stack_trace() { - let mut rbp: usize; - asm!("" : "={rbp}"(rbp) : : : "intel", "volatile"); - - println!("TRACE: {:>016X}", rbp); - //Maximum 64 frames - let active_table = ActivePageTable::new(); - for _frame in 0..64 { - if let Some(rip_rbp) = rbp.checked_add(mem::size_of::()) { - if active_table.translate(VirtualAddress::new(rbp)).is_some() && active_table.translate(VirtualAddress::new(rip_rbp)).is_some() { - let rip = *(rip_rbp as *const usize); - if rip == 0 { - println!(" {:>016X}: EMPTY RETURN", rbp); - break; - } - println!(" {:>016X}: {:>016X}", rbp, rip); - rbp = *(rbp as *const usize); - symbol_trace(rip); - } else { - println!(" {:>016X}: GUARD PAGE", rbp); - break; - } - } else { - println!(" {:>016X}: RBP OVERFLOW", rbp); - break; - } - } -} - -/// Get a symbol -//TODO: Do not create Elf object for every symbol lookup -#[inline(never)] -pub unsafe fn symbol_trace(addr: usize) { - use core::slice; - use core::sync::atomic::Ordering; - - use crate::elf::Elf; - use crate::start::{KERNEL_BASE, KERNEL_SIZE}; - - let kernel_ptr = (KERNEL_BASE.load(Ordering::SeqCst) + crate::KERNEL_OFFSET) as *const u8; - let kernel_slice = slice::from_raw_parts(kernel_ptr, KERNEL_SIZE.load(Ordering::SeqCst)); - if let Ok(elf) = Elf::from(kernel_slice) { - let mut strtab_opt = None; - for section in elf.sections() { - if section.sh_type == ::goblin::elf::section_header::SHT_STRTAB { - strtab_opt = Some(section); - break; - } - } - - if let Some(symbols) = elf.symbols() { - for sym in symbols { - if sym::st_type(sym.st_info) == sym::STT_FUNC - && addr >= sym.st_value as usize - && addr < (sym.st_value + sym.st_size) as usize - { - println!(" {:>016X}+{:>04X}", sym.st_value, addr - sym.st_value as usize); - - if let Some(strtab) = strtab_opt { - let start = strtab.sh_offset as usize + sym.st_name as usize; - let mut end = start; - while end < elf.data.len() { - let b = elf.data[end]; - end += 1; - if b == 0 { - break; - } - } - - if end > start { - let sym_slice = &elf.data[start .. end - 1]; - if let Ok(sym_name) = str::from_utf8(sym_slice) { - println!(" {:#}", demangle(sym_name)); - } - } - } - } - } - } - } -} diff --git a/src/arch/x86_64/macros.rs b/src/arch/x86_64/macros.rs index 5ad34d8d..5b165f90 100644 --- a/src/arch/x86_64/macros.rs +++ b/src/arch/x86_64/macros.rs @@ -1,6 +1,3 @@ -use core::mem; -use syscall::data::IntRegisters; - /// Print to console #[macro_export] macro_rules! print { @@ -18,375 +15,83 @@ macro_rules! println { ($fmt:expr, $($arg:tt)*) => (print!(concat!($fmt, "\n"), $($arg)*)); } -#[allow(dead_code)] -#[repr(packed)] -pub struct ScratchRegisters { - pub r11: usize, - pub r10: usize, - pub r9: usize, - pub r8: usize, - pub rsi: usize, - pub rdi: usize, - pub rdx: usize, - pub rcx: usize, - pub rax: usize, -} - -impl ScratchRegisters { - pub fn dump(&self) { - println!("RAX: {:>016X}", { self.rax }); - println!("RCX: {:>016X}", { self.rcx }); - println!("RDX: {:>016X}", { self.rdx }); - println!("RDI: {:>016X}", { self.rdi }); - println!("RSI: {:>016X}", { self.rsi }); - println!("R8: {:>016X}", { self.r8 }); - println!("R9: {:>016X}", { self.r9 }); - println!("R10: {:>016X}", { self.r10 }); - println!("R11: {:>016X}", { self.r11 }); +macro_rules! expand_bool( + ($value:expr) => { + concat!($value) } -} - -macro_rules! scratch_push { - () => (asm!( - "push rax - push rcx - push rdx - push rdi - push rsi - push r8 - push r9 - push r10 - push r11" - : : : : "intel", "volatile" - )); -} - -macro_rules! scratch_pop { - () => (asm!( - "pop r11 - pop r10 - pop r9 - pop r8 - pop rsi - pop rdi - pop rdx - pop rcx - pop rax" - : : : : "intel", "volatile" - )); -} - -#[allow(dead_code)] -#[repr(packed)] -pub struct PreservedRegisters { - pub r15: usize, - pub r14: usize, - pub r13: usize, - pub r12: usize, - pub rbp: usize, - pub rbx: usize, -} +); -impl PreservedRegisters { - pub fn dump(&self) { - println!("RBX: {:>016X}", { self.rbx }); - println!("RBP: {:>016X}", { self.rbp }); - println!("R12: {:>016X}", { self.r12 }); - println!("R13: {:>016X}", { self.r13 }); - println!("R14: {:>016X}", { self.r14 }); - println!("R15: {:>016X}", { self.r15 }); +macro_rules! alternative( + (feature: $feature:literal, then: [$($then:expr),*], default: [$($default:expr),*]) => { + alternative2!(feature1: $feature, then1: [$($then),*], feature2: "", then2: [""], default: [$($default),*]) } -} - -macro_rules! preserved_push { - () => (asm!( - "push rbx - push rbp - push r12 - push r13 - push r14 - push r15" - : : : : "intel", "volatile" - )); -} - -macro_rules! preserved_pop { - () => (asm!( - "pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx" - : : : : "intel", "volatile" - )); -} - -macro_rules! fs_push { - () => (asm!( - "push fs - mov rax, 0x18 - mov fs, ax" - : : : : "intel", "volatile" - )); -} - -macro_rules! fs_pop { - () => (asm!( - "pop fs" - : : : : "intel", "volatile" - )); -} - -#[allow(dead_code)] -#[repr(packed)] -pub struct IretRegisters { - pub rip: usize, - pub cs: usize, - pub rflags: usize, - // Will only be present if interrupt is raised from another - // privilege ring - pub rsp: usize, - pub ss: usize -} - -impl IretRegisters { - pub fn dump(&self) { - println!("RFLAG: {:>016X}", { self.rflags }); - println!("CS: {:>016X}", { self.cs }); - println!("RIP: {:>016X}", { self.rip }); - } -} - -macro_rules! iret { - () => (asm!( - "iretq" - : : : : "intel", "volatile" - )); -} - -/// Create an interrupt function that can safely run rust code -#[macro_export] -macro_rules! interrupt { - ($name:ident, $func:block) => { - #[naked] - pub unsafe extern fn $name () { - #[inline(never)] - unsafe fn inner() { - $func - } - - // Push scratch registers - scratch_push!(); - fs_push!(); - - // Map kernel - $crate::arch::x86_64::pti::map(); - - // Call inner rust function - inner(); - - // Unmap kernel - $crate::arch::x86_64::pti::unmap(); - - // Pop scratch registers and return - fs_pop!(); - scratch_pop!(); - iret!(); - } +); +macro_rules! saturating_sub( + ($lhs:literal, $rhs:literal) => { concat!( + "((", $lhs, ")>(", $rhs, "))*((", $lhs, ")-(", $rhs, "))", + ) } +); +// Use feature1 if present, otherwise try using feature2, otherwise use default. +// +// cpu_feature_always simply means it is always enabled. Thus, if feature2, which has lower +// priority, is "always" but feature1 is "auto", feature2 will still be checked for, and feature2 +// will become the fallback code. +// +// An empty string as feature is equivalent with "never". +macro_rules! alternative2( + (feature1: $feature1:literal, then1: [$($then1:expr),*], feature2: $feature2:literal, then2: [$($then2:expr),*], default: [$($default:expr),*]) => { + concat!(" + .set true, 1 + .set false, 0 + 40: + .if ", expand_bool!(cfg!(cpu_feature_always = $feature1)), " + ", $($then1,)* " + .elseif ", expand_bool!(cfg!(cpu_feature_always = $feature2)), " + ", $($then2,)* " + .else + ", $($default,)* " + .endif + 42: + .if ", expand_bool!(cfg!(cpu_feature_auto = $feature1)), " + .skip -", saturating_sub!("51f - 50f", "42b - 40b"), ", 0x90 + .endif + .if ", expand_bool!(cfg!(cpu_feature_auto = $feature2)), " + .skip -", saturating_sub!("61f - 60f", "42b - 40b"), ", 0x90 + .endif + 41: + ", + // FIXME: The assembler apparently complains "invalid number of bytes" despite it being + // quite obvious what saturating_sub does. + + // Declare them in reverse order. Last relocation wins! + alternative_auto!("6", $feature2, [$($then2),*]), + alternative_auto!("5", $feature1, [$($then1),*]), + ) }; -} - -#[allow(dead_code)] -#[repr(packed)] -pub struct InterruptStack { - pub fs: usize, - pub preserved: PreservedRegisters, - pub scratch: ScratchRegisters, - pub iret: IretRegisters, -} - -impl InterruptStack { - pub fn dump(&self) { - self.iret.dump(); - self.scratch.dump(); - self.preserved.dump(); - println!("FS: {:>016X}", { self.fs }); - } - /// Saves all registers to a struct used by the proc: - /// scheme to read/write registers. - pub fn save(&self, all: &mut IntRegisters) { - all.fs = self.fs; - - all.r15 = self.preserved.r15; - all.r14 = self.preserved.r14; - all.r13 = self.preserved.r13; - all.r12 = self.preserved.r12; - all.rbp = self.preserved.rbp; - all.rbx = self.preserved.rbx; - all.r11 = self.scratch.r11; - all.r10 = self.scratch.r10; - all.r9 = self.scratch.r9; - all.r8 = self.scratch.r8; - all.rsi = self.scratch.rsi; - all.rdi = self.scratch.rdi; - all.rdx = self.scratch.rdx; - all.rcx = self.scratch.rcx; - all.rax = self.scratch.rax; - all.rip = self.iret.rip; - all.cs = self.iret.cs; - all.rflags = self.iret.rflags; - - // Set rsp and ss: - - const CPL_MASK: usize = 0b11; - - let cs: usize; - unsafe { - asm!("mov $0, cs" : "=r"(cs) ::: "intel"); - } - - if self.iret.cs & CPL_MASK == cs & CPL_MASK { - // Privilege ring didn't change, so neither did the stack - all.rsp = self as *const Self as usize // rsp after Self was pushed to the stack - + mem::size_of::() // disregard Self - - mem::size_of::() * 2; // well, almost: rsp and ss need to be excluded as they aren't present - unsafe { - asm!("mov $0, ss" : "=r"(all.ss) ::: "intel"); - } - } else { - all.rsp = self.iret.rsp; - all.ss = self.iret.ss; - } - } - /// Loads all registers from a struct used by the proc: - /// scheme to read/write registers. - pub fn load(&mut self, all: &IntRegisters) { - // TODO: Which of these should be allowed to change? - - // self.fs = all.fs; - self.preserved.r15 = all.r15; - self.preserved.r14 = all.r14; - self.preserved.r13 = all.r13; - self.preserved.r12 = all.r12; - self.preserved.rbp = all.rbp; - self.preserved.rbx = all.rbx; - self.scratch.r11 = all.r11; - self.scratch.r10 = all.r10; - self.scratch.r9 = all.r9; - self.scratch.r8 = all.r8; - self.scratch.rsi = all.rsi; - self.scratch.rdi = all.rdi; - self.scratch.rdx = all.rdx; - self.scratch.rcx = all.rcx; - self.scratch.rax = all.rax; - self.iret.rip = all.rip; - self.iret.cs = all.cs; - // self.iret.rflags = all.eflags; - } - /// Enables the "Trap Flag" in the FLAGS register, causing the CPU - /// to send a Debug exception after the next instruction. This is - /// used for singlestep in the proc: scheme. - pub fn set_singlestep(&mut self, enabled: bool) { - if enabled { - self.iret.rflags |= 1 << 8; - } else { - self.iret.rflags &= !(1 << 8); - } - } -} - -#[macro_export] -macro_rules! interrupt_stack { - ($name:ident, $stack: ident, $func:block) => { - #[naked] - pub unsafe extern fn $name () { - #[inline(never)] - unsafe fn inner($stack: &mut $crate::arch::x86_64::macros::InterruptStack) { - $func - } - - // Push scratch registers - scratch_push!(); - preserved_push!(); - fs_push!(); - - // Get reference to stack variables - let rsp: usize; - asm!("" : "={rsp}"(rsp) : : : "intel", "volatile"); - - // Map kernel - $crate::arch::x86_64::pti::map(); - - // Call inner rust function - inner(&mut *(rsp as *mut $crate::arch::x86_64::macros::InterruptStack)); - - // Unmap kernel - $crate::arch::x86_64::pti::unmap(); - - // Pop scratch registers and return - fs_pop!(); - preserved_pop!(); - scratch_pop!(); - iret!(); - } - }; -} - -#[allow(dead_code)] -#[repr(packed)] -pub struct InterruptErrorStack { - pub fs: usize, - pub preserved: PreservedRegisters, - pub scratch: ScratchRegisters, - pub code: usize, - pub iret: IretRegisters, -} - -impl InterruptErrorStack { - pub fn dump(&self) { - self.iret.dump(); - println!("CODE: {:>016X}", { self.code }); - self.scratch.dump(); - self.preserved.dump(); - println!("FS: {:>016X}", { self.fs }); - } -} - -#[macro_export] -macro_rules! interrupt_error { - ($name:ident, $stack:ident, $func:block) => { - #[naked] - pub unsafe extern fn $name () { - #[inline(never)] - unsafe fn inner($stack: &$crate::arch::x86_64::macros::InterruptErrorStack) { - $func - } - - // Push scratch registers - scratch_push!(); - preserved_push!(); - fs_push!(); - - // Get reference to stack variables - let rsp: usize; - asm!("" : "={rsp}"(rsp) : : : "intel", "volatile"); - - // Map kernel - $crate::arch::x86_64::pti::map(); - - // Call inner rust function - inner(&*(rsp as *const $crate::arch::x86_64::macros::InterruptErrorStack)); - - // Unmap kernel - $crate::arch::x86_64::pti::unmap(); - - // Pop scratch registers, error code, and return - fs_pop!(); - preserved_pop!(); - scratch_pop!(); - asm!("add rsp, 8" : : : : "intel", "volatile"); - iret!(); - } - }; -} +); +macro_rules! alternative_auto( + ($first_digit:literal, $feature:literal, [$($then:expr),*]) => { concat!( + ".if ", expand_bool!(cfg!(cpu_feature_auto = $feature)), " + .pushsection .altcode.", $feature, ",\"a\" + ", $first_digit, "0: + ", $($then,)* " + ", $first_digit, "1: + .popsection + .pushsection .altfeatures.", $feature, ",\"a\" + 70: .ascii \"", $feature, "\" + 71: + .popsection + .pushsection .altrelocs.", $feature, ",\"a\" + .quad 70b + .quad 71b - 70b + .quad 40b + .quad 42b - 40b + .quad 41b - 40b + .quad 0 + .quad ", $first_digit, "0b + .quad ", $first_digit, "1b - ", $first_digit, "0b + .popsection + .endif + ", + ) } +); diff --git a/src/arch/x86_64/misc.rs b/src/arch/x86_64/misc.rs new file mode 100644 index 00000000..8659101c --- /dev/null +++ b/src/arch/x86_64/misc.rs @@ -0,0 +1,27 @@ +use x86::controlregs::Cr4; + +use crate::{ + cpu_set::LogicalCpuId, + cpuid::{cpuid, has_ext_feat}, +}; + +pub unsafe fn init(cpu_id: LogicalCpuId) { + if has_ext_feat(|feat| feat.has_umip()) { + // UMIP (UserMode Instruction Prevention) forbids userspace from calling SGDT, SIDT, SLDT, + // SMSW and STR. KASLR is currently not implemented, but this protects against leaking + // addresses. + x86::controlregs::cr4_write(x86::controlregs::cr4() | Cr4::CR4_ENABLE_UMIP); + } + if has_ext_feat(|feat| feat.has_smep()) { + // SMEP (Supervisor-Mode Execution Prevention) forbids the kernel from executing + // instruction on any page marked "userspace-accessible". This improves security for + // obvious reasons. + x86::controlregs::cr4_write(x86::controlregs::cr4() | Cr4::CR4_ENABLE_SMEP); + } + + if let Some(feats) = cpuid().get_extended_processor_and_feature_identifiers() + && feats.has_rdtscp() + { + x86::msr::wrmsr(x86::msr::IA32_TSC_AUX, cpu_id.get().into()); + } +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 715bcad6..59823f4c 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,36 +1,67 @@ +pub use crate::arch::x86_shared::*; + +pub mod alternative; + #[macro_use] pub mod macros; -/// Debugging support -pub mod debug; +/// Constants like memory locations +pub mod consts; -/// Devices -pub mod device; +/// CPUID wrapper +pub mod cpuid; /// Global descriptor table pub mod gdt; -/// Graphical debug -#[cfg(feature = "graphical_debug")] -mod graphical_debug; - -/// Interrupt descriptor table -pub mod idt; - /// Interrupt instructions +#[macro_use] pub mod interrupt; -/// Inter-processor interrupts -pub mod ipi; +/// Miscellaneous processor features +pub mod misc; /// Paging pub mod paging; -/// Page table isolation -pub mod pti; +pub mod rmm; /// Initialization and start function pub mod start; -/// Stop function -pub mod stop; +pub use ::rmm::X8664Arch as CurrentRmmArch; + +// Flags +pub mod flags { + pub const SHIFT_SINGLESTEP: usize = 8; + pub const FLAG_SINGLESTEP: usize = 1 << SHIFT_SINGLESTEP; +} + +// TODO: Maybe support rewriting relocations (using LD's --emit-relocs) when working with entire +// functions? +#[naked] +#[link_section = ".usercopy-fns"] +pub unsafe extern "C" fn arch_copy_to_user(dst: usize, src: usize, len: usize) -> u8 { + // TODO: spectre_v1 + + core::arch::naked_asm!(alternative!( + feature: "smap", + then: [" + xor eax, eax + mov rcx, rdx + stac + rep movsb + clac + ret + "], + default: [" + xor eax, eax + mov rcx, rdx + rep movsb + ret + "] + )); +} +pub use arch_copy_to_user as arch_copy_from_user; + +pub use alternative::kfx_size; diff --git a/src/arch/x86_64/paging/entry.rs b/src/arch/x86_64/paging/entry.rs deleted file mode 100644 index 3f59f0e1..00000000 --- a/src/arch/x86_64/paging/entry.rs +++ /dev/null @@ -1,78 +0,0 @@ -//! # Page table entry -//! Some code borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) - -use crate::memory::Frame; - -use super::PhysicalAddress; - -/// A page table entry -pub struct Entry(u64); - -bitflags! { - pub struct EntryFlags: u64 { - const PRESENT = 1; - const WRITABLE = 1 << 1; - const USER_ACCESSIBLE = 1 << 2; - const WRITE_THROUGH = 1 << 3; - const NO_CACHE = 1 << 4; - const ACCESSED = 1 << 5; - const DIRTY = 1 << 6; - const HUGE_PAGE = 1 << 7; - const GLOBAL = 1 << 8; - const NO_EXECUTE = 1 << 63; - } -} - -pub const ADDRESS_MASK: usize = 0x000f_ffff_ffff_f000; -pub const COUNTER_MASK: u64 = 0x3ff0_0000_0000_0000; - -impl Entry { - /// Clear entry - pub fn set_zero(&mut self) { - self.0 = 0; - } - - /// Is the entry unused? - pub fn is_unused(&self) -> bool { - self.0 == (self.0 & COUNTER_MASK) - } - - /// Make the entry unused - pub fn set_unused(&mut self) { - self.0 &= COUNTER_MASK; - } - - /// Get the address this page references - pub fn address(&self) -> PhysicalAddress { - PhysicalAddress::new(self.0 as usize & ADDRESS_MASK) - } - - /// Get the current entry flags - pub fn flags(&self) -> EntryFlags { - EntryFlags::from_bits_truncate(self.0) - } - - /// Get the associated frame, if available - pub fn pointed_frame(&self) -> Option { - if self.flags().contains(EntryFlags::PRESENT) { - Some(Frame::containing_address(self.address())) - } else { - None - } - } - - pub fn set(&mut self, frame: Frame, flags: EntryFlags) { - debug_assert!(frame.start_address().get() & !ADDRESS_MASK == 0); - self.0 = (frame.start_address().get() as u64) | flags.bits() | (self.0 & COUNTER_MASK); - } - - /// Get bits 52-61 in entry, used as counter for page table - pub fn counter_bits(&self) -> u64 { - (self.0 & COUNTER_MASK) >> 52 - } - - /// Set bits 52-61 in entry, used as counter for page table - pub fn set_counter_bits(&mut self, count: u64) { - self.0 = (self.0 & !COUNTER_MASK) | (count << 52); - } -} diff --git a/src/arch/x86_64/paging/mapper.rs b/src/arch/x86_64/paging/mapper.rs index f8020678..73dd4893 100644 --- a/src/arch/x86_64/paging/mapper.rs +++ b/src/arch/x86_64/paging/mapper.rs @@ -1,237 +1,22 @@ -use core::mem; -use core::ptr::Unique; +use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::{allocate_frames, deallocate_frames, Frame}; +use super::RmmA; -use super::{ActivePageTable, Page, PAGE_SIZE, PhysicalAddress, VirtualAddress}; -use super::entry::EntryFlags; -use super::table::{self, Table, Level4}; +pub use rmm::{Flusher, PageFlush, PageFlushAll}; -/// In order to enforce correct paging operations in the kernel, these types -/// are returned on any mapping operation to get the code involved to specify -/// how it intends to flush changes to a page table -#[must_use = "The page table must be flushed, or the changes unsafely ignored"] -pub struct MapperFlush(Page); - -impl MapperFlush { - /// Create a new page flush promise - pub fn new(page: Page) -> MapperFlush { - MapperFlush(page) - } - - /// Flush this page in the active table - pub fn flush(self, table: &mut ActivePageTable) { - table.flush(self.0); - mem::forget(self); - } - - /// Ignore the flush. This is unsafe, and a reason should be provided for use - pub unsafe fn ignore(self) { - mem::forget(self); - } -} - -/// A flush cannot be dropped, it must be consumed -impl Drop for MapperFlush { - fn drop(&mut self) { - panic!("Mapper flush was not utilized"); - } +pub struct InactiveFlusher { + _inner: (), } - -/// To allow for combining multiple flushes into one, we have a way of flushing -/// the active table, which can consume `MapperFlush` structs -#[must_use = "The page table must be flushed, or the changes unsafely ignored"] -pub struct MapperFlushAll(bool); - -impl MapperFlushAll { - /// Create a new promise to flush all mappings - pub fn new() -> MapperFlushAll { - MapperFlushAll(false) - } - - /// Consume a single page flush - pub fn consume(&mut self, flush: MapperFlush) { - self.0 = true; - mem::forget(flush); - } - - /// Flush the active page table - pub fn flush(self, table: &mut ActivePageTable) { - if self.0 { - table.flush_all(); +impl Flusher for InactiveFlusher { + fn consume(&mut self, flush: PageFlush) { + // TODO: Push to TLB "mailbox" or tell it to reload CR3 if there are too many entries. + unsafe { + flush.ignore(); } - mem::forget(self); - } - - /// Ignore the flush. This is unsafe, and a reason should be provided for use - pub unsafe fn ignore(self) { - mem::forget(self); } } - -/// A flush cannot be dropped, it must be consumed -impl Drop for MapperFlushAll { +impl Drop for InactiveFlusher { fn drop(&mut self) { - panic!("Mapper flush all was not utilized"); - } -} - -pub struct Mapper { - p4: Unique>, -} - -impl Mapper { - /// Create a new page table - pub unsafe fn new() -> Mapper { - Mapper { - p4: Unique::new_unchecked(table::P4), - } - } - - pub fn p4(&self) -> &Table { - unsafe { self.p4.as_ref() } - } - - pub fn p4_mut(&mut self) -> &mut Table { - unsafe { self.p4.as_mut() } - } - - /// Map a page to a frame - pub fn map_to(&mut self, page: Page, frame: Frame, flags: EntryFlags) -> MapperFlush { - let p3 = self.p4_mut().next_table_create(page.p4_index()); - let p2 = p3.next_table_create(page.p3_index()); - let p1 = p2.next_table_create(page.p2_index()); - - assert!(p1[page.p1_index()].is_unused(), - "{:X}: Set to {:X}: {:?}, requesting {:X}: {:?}", - page.start_address().get(), - p1[page.p1_index()].address().get(), p1[page.p1_index()].flags(), - frame.start_address().get(), flags); - p1.increment_entry_count(); - p1[page.p1_index()].set(frame, flags | EntryFlags::PRESENT); - MapperFlush::new(page) - } - - /// Map a page to the next free frame - pub fn map(&mut self, page: Page, flags: EntryFlags) -> MapperFlush { - let frame = allocate_frames(1).expect("out of frames"); - self.map_to(page, frame, flags) - } - - /// Update flags for a page - pub fn remap(&mut self, page: Page, flags: EntryFlags) -> MapperFlush { - let p3 = self.p4_mut().next_table_mut(page.p4_index()).expect("failed to remap: no p3"); - let p2 = p3.next_table_mut(page.p3_index()).expect("failed to remap: no p2"); - let p1 = p2.next_table_mut(page.p2_index()).expect("failed to remap: no p1"); - let frame = p1[page.p1_index()].pointed_frame().expect("failed to remap: not mapped"); - p1[page.p1_index()].set(frame, flags | EntryFlags::PRESENT); - MapperFlush::new(page) - } - - /// Identity map a frame - pub fn identity_map(&mut self, frame: Frame, flags: EntryFlags) -> MapperFlush { - let page = Page::containing_address(VirtualAddress::new(frame.start_address().get())); - self.map_to(page, frame, flags) - } - - fn unmap_inner(&mut self, page: &Page, keep_parents: bool) -> Frame { - let frame; - - let p4 = self.p4_mut(); - if let Some(p3) = p4.next_table_mut(page.p4_index()) { - if let Some(p2) = p3.next_table_mut(page.p3_index()) { - if let Some(p1) = p2.next_table_mut(page.p2_index()) { - frame = if let Some(frame) = p1[page.p1_index()].pointed_frame() { - frame - } else { - panic!("unmap_inner({:X}): frame not found", page.start_address().get()) - }; - - p1.decrement_entry_count(); - p1[page.p1_index()].set_unused(); - - if keep_parents || ! p1.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p1 not found", page.start_address().get()); - } - - if let Some(p1_frame) = p2[page.p2_index()].pointed_frame() { - //println!("Free p1 {:?}", p1_frame); - p2.decrement_entry_count(); - p2[page.p2_index()].set_unused(); - deallocate_frames(p1_frame, 1); - } else { - panic!("unmap_inner({:X}): p1_frame not found", page.start_address().get()); - } - - if ! p2.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p2 not found", page.start_address().get()); - } - - if let Some(p2_frame) = p3[page.p3_index()].pointed_frame() { - //println!("Free p2 {:?}", p2_frame); - p3.decrement_entry_count(); - p3[page.p3_index()].set_unused(); - deallocate_frames(p2_frame, 1); - } else { - panic!("unmap_inner({:X}): p2_frame not found", page.start_address().get()); - } - - if ! p3.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p3 not found", page.start_address().get()); - } - - if let Some(p3_frame) = p4[page.p4_index()].pointed_frame() { - //println!("Free p3 {:?}", p3_frame); - p4.decrement_entry_count(); - p4[page.p4_index()].set_unused(); - deallocate_frames(p3_frame, 1); - } else { - panic!("unmap_inner({:X}): p3_frame not found", page.start_address().get()); - } - - frame - } - - /// Unmap a page - pub fn unmap(&mut self, page: Page) -> MapperFlush { - let frame = self.unmap_inner(&page, false); - deallocate_frames(frame, 1); - MapperFlush::new(page) - } - - /// Unmap a page, return frame without free - pub fn unmap_return(&mut self, page: Page, keep_parents: bool) -> (MapperFlush, Frame) { - let frame = self.unmap_inner(&page, keep_parents); - (MapperFlush::new(page), frame) - } - - pub fn translate_page(&self, page: Page) -> Option { - self.p4().next_table(page.p4_index()) - .and_then(|p3| p3.next_table(page.p3_index())) - .and_then(|p2| p2.next_table(page.p2_index())) - .and_then(|p1| p1[page.p1_index()].pointed_frame()) - } - - pub fn translate_page_flags(&self, page: Page) -> Option { - self.p4().next_table(page.p4_index()) - .and_then(|p3| p3.next_table(page.p3_index())) - .and_then(|p2| p2.next_table(page.p2_index())) - .and_then(|p1| Some(p1[page.p1_index()].flags())) - } - - /// Translate a virtual address to a physical one - pub fn translate(&self, virtual_address: VirtualAddress) -> Option { - let offset = virtual_address.get() % PAGE_SIZE; - self.translate_page(Page::containing_address(virtual_address)) - .map(|frame| PhysicalAddress::new(frame.start_address().get() + offset)) + ipi(IpiKind::Tlb, IpiTarget::Other); } } diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index 84880b12..e892cb55 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -1,28 +1,36 @@ //! # Paging //! Some code was borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) -use core::{mem, ptr}; -use core::ops::{Deref, DerefMut}; -use x86::shared::{control_regs, msr, tlb}; +use core::fmt::Debug; -use crate::memory::{allocate_frames, Frame}; +use x86::msr; -use self::entry::EntryFlags; -use self::mapper::Mapper; -use self::temporary_page::TemporaryPage; +pub use super::CurrentRmmArch as RmmA; +pub use rmm::{Arch as RmmArch, PageFlags, PhysicalAddress, TableKind, VirtualAddress}; + +pub type PageMapper = rmm::PageMapper; + +pub mod entry { + bitflags! { + pub struct EntryFlags: usize { + const NO_CACHE = 1 << 4; + const HUGE_PAGE = 1 << 7; + const GLOBAL = 1 << 8; + const DEV_MEM = 0; + } + } +} -pub mod entry; pub mod mapper; -pub mod table; -pub mod temporary_page; -/// Number of entries per page table -pub const ENTRY_COUNT: usize = 512; +pub const ENTRY_COUNT: usize = RmmA::PAGE_ENTRIES; /// Size of pages -pub const PAGE_SIZE: usize = 4096; +pub const PAGE_SIZE: usize = RmmA::PAGE_SIZE; +pub const PAGE_MASK: usize = RmmA::PAGE_OFFSET_MASK; /// Setup page attribute table +#[cold] unsafe fn init_pat() { let uncacheable = 0; let write_combining = 1; @@ -41,393 +49,69 @@ unsafe fn init_pat() { let pat6 = pat2; let pat7 = pat3; - msr::wrmsr(msr::IA32_PAT, pat7 << 56 | pat6 << 48 | pat5 << 40 | pat4 << 32 - | pat3 << 24 | pat2 << 16 | pat1 << 8 | pat0); -} - -/// Copy tdata, clear tbss, set TCB self pointer -unsafe fn init_tcb(cpu_id: usize) -> usize { - extern { - /// The starting byte of the thread data segment - static mut __tdata_start: u8; - /// The ending byte of the thread data segment - static mut __tdata_end: u8; - /// The starting byte of the thread BSS segment - static mut __tbss_start: u8; - /// The ending byte of the thread BSS segment - static mut __tbss_end: u8; - } - - let tcb_offset; - { - let size = & __tbss_end as *const _ as usize - & __tdata_start as *const _ as usize; - let tbss_offset = & __tbss_start as *const _ as usize - & __tdata_start as *const _ as usize; - - let start = crate::KERNEL_PERCPU_OFFSET + crate::KERNEL_PERCPU_SIZE * cpu_id; - let end = start + size; - tcb_offset = end - mem::size_of::(); - - ptr::copy(& __tdata_start as *const u8, start as *mut u8, tbss_offset); - ptr::write_bytes((start + tbss_offset) as *mut u8, 0, size - tbss_offset); - - *(tcb_offset as *mut usize) = end; - } - tcb_offset -} - -/// Initialize paging -/// -/// Returns page table and thread control block offset -pub unsafe fn init(cpu_id: usize, kernel_start: usize, kernel_end: usize, stack_start: usize, stack_end: usize) -> (ActivePageTable, usize) { - extern { - /// The starting byte of the text (code) data segment. - static mut __text_start: u8; - /// The ending byte of the text (code) data segment. - static mut __text_end: u8; - /// The starting byte of the _.rodata_ (read-only data) segment. - static mut __rodata_start: u8; - /// The ending byte of the _.rodata_ (read-only data) segment. - static mut __rodata_end: u8; - /// The starting byte of the _.data_ segment. - static mut __data_start: u8; - /// The ending byte of the _.data_ segment. - static mut __data_end: u8; - /// The starting byte of the thread data segment - static mut __tdata_start: u8; - /// The ending byte of the thread data segment - static mut __tdata_end: u8; - /// The starting byte of the thread BSS segment - static mut __tbss_start: u8; - /// The ending byte of the thread BSS segment - static mut __tbss_end: u8; - /// The starting byte of the _.bss_ (uninitialized data) segment. - static mut __bss_start: u8; - /// The ending byte of the _.bss_ (uninitialized data) segment. - static mut __bss_end: u8; - } - - init_pat(); - - let mut active_table = ActivePageTable::new(); - - let mut temporary_page = TemporaryPage::new(Page::containing_address(VirtualAddress::new(crate::USER_TMP_MISC_OFFSET))); - - let mut new_table = { - let frame = allocate_frames(1).expect("no more frames in paging::init new_table"); - InactivePageTable::new(frame, &mut active_table, &mut temporary_page) - }; - - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - // Remap stack writable, no execute - { - let start_frame = Frame::containing_address(PhysicalAddress::new(stack_start - crate::KERNEL_OFFSET)); - let end_frame = Frame::containing_address(PhysicalAddress::new(stack_end - crate::KERNEL_OFFSET - 1)); - for frame in Frame::range_inclusive(start_frame, end_frame) { - let page = Page::containing_address(VirtualAddress::new(frame.start_address().get() + crate::KERNEL_OFFSET)); - let result = mapper.map_to(page, frame, EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE); - // The flush can be ignored as this is not the active table. See later active_table.switch - /* unsafe */ { result.ignore(); } - } - } - - // Map all frames in kernel - { - let start_frame = Frame::containing_address(PhysicalAddress::new(kernel_start)); - let end_frame = Frame::containing_address(PhysicalAddress::new(kernel_end - 1)); - for frame in Frame::range_inclusive(start_frame, end_frame) { - let phys_addr = frame.start_address().get(); - let virt_addr = phys_addr + crate::KERNEL_OFFSET; - - macro_rules! in_section { - ($n: ident) => ( - virt_addr >= & concat_idents!(__, $n, _start) as *const u8 as usize && - virt_addr < & concat_idents!(__, $n, _end) as *const u8 as usize - ); - } - - let flags = if in_section!(text) { - // Remap text read-only - EntryFlags::PRESENT | EntryFlags::GLOBAL - } else if in_section!(rodata) { - // Remap rodata read-only, no execute - EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE - } else if in_section!(data) { - // Remap data writable, no execute - EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE - } else if in_section!(tdata) { - // Remap tdata master read-only, no execute - EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE - } else if in_section!(bss) { - // Remap bss writable, no execute - EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE - } else { - // Remap anything else read-only, no execute - EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE - }; - - let page = Page::containing_address(VirtualAddress::new(virt_addr)); - let result = mapper.map_to(page, frame, flags); - // The flush can be ignored as this is not the active table. See later active_table.switch - /* unsafe */ { result.ignore(); } - } - } - - // Map tdata and tbss - { - let size = & __tbss_end as *const _ as usize - & __tdata_start as *const _ as usize; - - let start = crate::KERNEL_PERCPU_OFFSET + crate::KERNEL_PERCPU_SIZE * cpu_id; - let end = start + size; - - let start_page = Page::containing_address(VirtualAddress::new(start)); - let end_page = Page::containing_address(VirtualAddress::new(end - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let result = mapper.map(page, EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE); - // The flush can be ignored as this is not the active table. See later active_table.switch - result.ignore(); - } - } - }); - - // This switches the active table, which is setup by the bootloader, to a correct table - // setup by the lambda above. This will also flush the TLB - active_table.switch(new_table); - - (active_table, init_tcb(cpu_id)) -} - -pub unsafe fn init_ap(cpu_id: usize, bsp_table: usize, stack_start: usize, stack_end: usize) -> usize { - extern { - /// The starting byte of the thread data segment - static mut __tdata_start: u8; - /// The ending byte of the thread data segment - static mut __tdata_end: u8; - /// The starting byte of the thread BSS segment - static mut __tbss_start: u8; - /// The ending byte of the thread BSS segment - static mut __tbss_end: u8; - } - + msr::wrmsr( + msr::IA32_PAT, + pat7 << 56 + | pat6 << 48 + | pat5 << 40 + | pat4 << 32 + | pat3 << 24 + | pat2 << 16 + | pat1 << 8 + | pat0, + ); +} + +/// Initialize PAT +#[cold] +pub unsafe fn init() { init_pat(); - - let mut active_table = ActivePageTable::new(); - - let mut new_table = InactivePageTable::from_address(bsp_table); - - let mut temporary_page = TemporaryPage::new(Page::containing_address(VirtualAddress::new(crate::USER_TMP_MISC_OFFSET))); - - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - // Map tdata and tbss - { - let size = & __tbss_end as *const _ as usize - & __tdata_start as *const _ as usize; - - let start = crate::KERNEL_PERCPU_OFFSET + crate::KERNEL_PERCPU_SIZE * cpu_id; - let end = start + size; - - let start_page = Page::containing_address(VirtualAddress::new(start)); - let end_page = Page::containing_address(VirtualAddress::new(end - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let result = mapper.map(page, EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE); - // The flush can be ignored as this is not the active table. See later active_table.switch - result.ignore(); - } - } - - let mut remap = |start: usize, end: usize, flags: EntryFlags| { - if end > start { - let start_frame = Frame::containing_address(PhysicalAddress::new(start)); - let end_frame = Frame::containing_address(PhysicalAddress::new(end - 1)); - for frame in Frame::range_inclusive(start_frame, end_frame) { - let page = Page::containing_address(VirtualAddress::new(frame.start_address().get() + crate::KERNEL_OFFSET)); - let result = mapper.map_to(page, frame, flags); - // The flush can be ignored as this is not the active table. See later active_table.switch - result.ignore(); - } - } - }; - - // Remap stack writable, no execute - remap(stack_start - crate::KERNEL_OFFSET, stack_end - crate::KERNEL_OFFSET, EntryFlags::PRESENT | EntryFlags::GLOBAL | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE); - }); - - // This switches the active table, which is setup by the bootloader, to a correct table - // setup by the lambda above. This will also flush the TLB - active_table.switch(new_table); - - init_tcb(cpu_id) -} - -pub struct ActivePageTable { - mapper: Mapper, -} - -impl Deref for ActivePageTable { - type Target = Mapper; - - fn deref(&self) -> &Mapper { - &self.mapper - } -} - -impl DerefMut for ActivePageTable { - fn deref_mut(&mut self) -> &mut Mapper { - &mut self.mapper - } -} - -impl ActivePageTable { - pub unsafe fn new() -> ActivePageTable { - ActivePageTable { - mapper: Mapper::new(), - } - } - - pub fn switch(&mut self, new_table: InactivePageTable) -> InactivePageTable { - let old_table = InactivePageTable { - p4_frame: Frame::containing_address( - PhysicalAddress::new(unsafe { control_regs::cr3() } as usize) - ), - }; - unsafe { - control_regs::cr3_write(new_table.p4_frame.start_address().get() as u64); - } - old_table - } - - pub fn flush(&mut self, page: Page) { - unsafe { tlb::flush(page.start_address().get()); } - } - - pub fn flush_all(&mut self) { - unsafe { tlb::flush_all(); } - } - - pub fn with(&mut self, table: &mut InactivePageTable, temporary_page: &mut TemporaryPage, f: F) - where F: FnOnce(&mut Mapper) - { - { - let backup = Frame::containing_address(PhysicalAddress::new(unsafe { control_regs::cr3() as usize })); - - // map temporary_page to current p4 table - let p4_table = temporary_page.map_table_frame(backup.clone(), EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE, self); - - // overwrite recursive mapping - self.p4_mut()[crate::RECURSIVE_PAGE_PML4].set(table.p4_frame.clone(), EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE); - self.flush_all(); - - // execute f in the new context - f(self); - - // restore recursive mapping to original p4 table - p4_table[crate::RECURSIVE_PAGE_PML4].set(backup, EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE); - self.flush_all(); - } - - temporary_page.unmap(self); - } - - pub unsafe fn address(&self) -> usize { - control_regs::cr3() as usize - } -} - -pub struct InactivePageTable { - p4_frame: Frame, -} - -impl InactivePageTable { - pub fn new(frame: Frame, active_table: &mut ActivePageTable, temporary_page: &mut TemporaryPage) -> InactivePageTable { - { - let table = temporary_page.map_table_frame(frame.clone(), EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE, active_table); - // now we are able to zero the table - table.zero(); - // set up recursive mapping for the table - table[crate::RECURSIVE_PAGE_PML4].set(frame.clone(), EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE); - } - temporary_page.unmap(active_table); - - InactivePageTable { p4_frame: frame } - } - - pub unsafe fn from_address(cr3: usize) -> InactivePageTable { - InactivePageTable { p4_frame: Frame::containing_address(PhysicalAddress::new(cr3)) } - } - - pub unsafe fn address(&self) -> usize { - self.p4_frame.start_address().get() - } -} - -/// A physical address. -#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct PhysicalAddress(usize); - -impl PhysicalAddress { - pub fn new(address: usize) -> Self { - PhysicalAddress(address) - } - - pub fn get(&self) -> usize { - self.0 - } -} - -/// A virtual address. -#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct VirtualAddress(usize); - -impl VirtualAddress { - pub fn new(address: usize) -> Self { - VirtualAddress(address) - } - - pub fn get(&self) -> usize { - self.0 - } } /// Page -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Page { - number: usize + number: usize, } impl Page { - pub fn start_address(&self) -> VirtualAddress { + pub fn start_address(self) -> VirtualAddress { VirtualAddress::new(self.number * PAGE_SIZE) } - pub fn p4_index(&self) -> usize { - (self.number >> 27) & 0o777 - } - - pub fn p3_index(&self) -> usize { - (self.number >> 18) & 0o777 - } - - pub fn p2_index(&self) -> usize { - (self.number >> 9) & 0o777 - } - - pub fn p1_index(&self) -> usize { - self.number & 0o777 - } - pub fn containing_address(address: VirtualAddress) -> Page { - //TODO assert!(address.get() < 0x0000_8000_0000_0000 || address.get() >= 0xffff_8000_0000_0000, - // "invalid address: 0x{:x}", address.get()); - Page { number: address.get() / PAGE_SIZE } + //TODO assert!(address.data() < 0x0000_8000_0000_0000 || address.data() >= 0xffff_8000_0000_0000, + // "invalid address: 0x{:x}", address.data()); + Page { + number: address.data() / PAGE_SIZE, + } } - pub fn range_inclusive(start: Page, end: Page) -> PageIter { + pub fn range_inclusive(start: Page, r#final: Page) -> PageIter { PageIter { - start: start, - end: end, + start, + end: r#final.next(), } } - pub fn next(self) -> Page { - Self { number: self.number + 1 } + self.next_by(1) + } + pub fn next_by(self, n: usize) -> Page { + Self { + number: self.number + n, + } + } + pub fn offset_from(self, other: Self) -> usize { + self.number - other.number + } +} +impl Debug for Page { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "[page at {:p}]", + self.start_address().data() as *const u8 + ) } } @@ -440,7 +124,7 @@ impl Iterator for PageIter { type Item = Page; fn next(&mut self) -> Option { - if self.start <= self.end { + if self.start < self.end { let page = self.start; self.start = self.start.next(); Some(page) @@ -449,3 +133,12 @@ impl Iterator for PageIter { } } } + +/// Round down to the nearest multiple of page size +pub fn round_down_pages(number: usize) -> usize { + number.div_floor(PAGE_SIZE) * PAGE_SIZE +} +/// Round up to the nearest multiple of page size +pub fn round_up_pages(number: usize) -> usize { + number.next_multiple_of(PAGE_SIZE) +} diff --git a/src/arch/x86_64/paging/table.rs b/src/arch/x86_64/paging/table.rs deleted file mode 100644 index aac93234..00000000 --- a/src/arch/x86_64/paging/table.rs +++ /dev/null @@ -1,128 +0,0 @@ -//! # Page table -//! Code borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) - -use core::marker::PhantomData; -use core::ops::{Index, IndexMut}; - -use crate::memory::allocate_frames; - -use super::entry::{EntryFlags, Entry}; -use super::ENTRY_COUNT; - -pub const P4: *mut Table = (crate::RECURSIVE_PAGE_OFFSET | 0x7ffffff000) as *mut _; - -pub trait TableLevel {} - -pub enum Level4 {} -pub enum Level3 {} -pub enum Level2 {} -pub enum Level1 {} - -impl TableLevel for Level4 {} -impl TableLevel for Level3 {} -impl TableLevel for Level2 {} -impl TableLevel for Level1 {} - -pub trait HierarchicalLevel: TableLevel { - type NextLevel: TableLevel; -} - -impl HierarchicalLevel for Level4 { - type NextLevel = Level3; -} - -impl HierarchicalLevel for Level3 { - type NextLevel = Level2; -} - -impl HierarchicalLevel for Level2 { - type NextLevel = Level1; -} - -pub struct Table { - entries: [Entry; ENTRY_COUNT], - level: PhantomData, -} - -impl Table where L: TableLevel { - pub fn is_unused(&self) -> bool { - if self.entry_count() > 0 { - return false; - } - - true - } - - pub fn zero(&mut self) { - for entry in self.entries.iter_mut() { - entry.set_zero(); - } - } - - /// Set number of entries in first table entry - fn set_entry_count(&mut self, count: u64) { - debug_assert!(count <= ENTRY_COUNT as u64, "count can't be greater than ENTRY_COUNT"); - self.entries[0].set_counter_bits(count); - } - - /// Get number of entries in first table entry - fn entry_count(&self) -> u64 { - self.entries[0].counter_bits() - } - - pub fn increment_entry_count(&mut self) { - let current_count = self.entry_count(); - self.set_entry_count(current_count + 1); - } - - pub fn decrement_entry_count(&mut self) { - let current_count = self.entry_count(); - self.set_entry_count(current_count - 1); - } -} - -impl Table where L: HierarchicalLevel { - pub fn next_table(&self, index: usize) -> Option<&Table> { - self.next_table_address(index).map(|address| unsafe { &*(address as *const _) }) - } - - pub fn next_table_mut(&mut self, index: usize) -> Option<&mut Table> { - self.next_table_address(index).map(|address| unsafe { &mut *(address as *mut _) }) - } - - pub fn next_table_create(&mut self, index: usize) -> &mut Table { - if self.next_table(index).is_none() { - assert!(!self[index].flags().contains(EntryFlags::HUGE_PAGE), - "next_table_create does not support huge pages"); - let frame = allocate_frames(1).expect("no frames available"); - self.increment_entry_count(); - self[index].set(frame, EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::USER_ACCESSIBLE /* Allow users to go down the page table, implement permissions at the page level */); - self.next_table_mut(index).unwrap().zero(); - } - self.next_table_mut(index).unwrap() - } - - fn next_table_address(&self, index: usize) -> Option { - let entry_flags = self[index].flags(); - if entry_flags.contains(EntryFlags::PRESENT) && !entry_flags.contains(EntryFlags::HUGE_PAGE) { - let table_address = self as *const _ as usize; - Some((table_address << 9) | (index << 12)) - } else { - None - } - } -} - -impl Index for Table where L: TableLevel { - type Output = Entry; - - fn index(&self, index: usize) -> &Entry { - &self.entries[index] - } -} - -impl IndexMut for Table where L: TableLevel { - fn index_mut(&mut self, index: usize) -> &mut Entry { - &mut self.entries[index] - } -} diff --git a/src/arch/x86_64/paging/temporary_page.rs b/src/arch/x86_64/paging/temporary_page.rs deleted file mode 100644 index c50bbf23..00000000 --- a/src/arch/x86_64/paging/temporary_page.rs +++ /dev/null @@ -1,45 +0,0 @@ -//! Temporarily map a page -//! From [Phil Opp's Blog](http://os.phil-opp.com/remap-the-kernel.html) - -use crate::memory::Frame; - -use super::{ActivePageTable, Page, VirtualAddress}; -use super::entry::EntryFlags; -use super::table::{Table, Level1}; - -pub struct TemporaryPage { - page: Page, -} - -impl TemporaryPage { - pub fn new(page: Page) -> TemporaryPage { - TemporaryPage { - page: page, - } - } - - pub fn start_address (&self) -> VirtualAddress { - self.page.start_address() - } - - /// Maps the temporary page to the given frame in the active table. - /// Returns the start address of the temporary page. - pub fn map(&mut self, frame: Frame, flags: EntryFlags, active_table: &mut ActivePageTable) -> VirtualAddress { - assert!(active_table.translate_page(self.page).is_none(), "temporary page is already mapped"); - let result = active_table.map_to(self.page, frame, flags); - result.flush(active_table); - self.page.start_address() - } - - /// Maps the temporary page to the given page table frame in the active - /// table. Returns a reference to the now mapped table. - pub fn map_table_frame(&mut self, frame: Frame, flags: EntryFlags, active_table: &mut ActivePageTable) -> &mut Table { - unsafe { &mut *(self.map(frame, flags, active_table).get() as *mut Table) } - } - - /// Unmaps the temporary page in the active table. - pub fn unmap(&mut self, active_table: &mut ActivePageTable) { - let (result, _frame) = active_table.unmap_return(self.page, true); - result.flush(active_table); - } -} diff --git a/src/arch/x86_64/rmm.rs b/src/arch/x86_64/rmm.rs new file mode 100644 index 00000000..9d78a2f9 --- /dev/null +++ b/src/arch/x86_64/rmm.rs @@ -0,0 +1,18 @@ +use rmm::{Arch, PageFlags, VirtualAddress}; + +pub unsafe fn page_flags(virt: VirtualAddress) -> PageFlags { + use crate::kernel_executable_offsets::*; + let virt_addr = virt.data(); + + (if virt_addr >= __text_start() && virt_addr < __text_end() { + // Remap text read-only, execute + PageFlags::new().execute(true) + } else if virt_addr >= __rodata_start() && virt_addr < __rodata_end() { + // Remap rodata read-only, no execute + PageFlags::new() + } else { + // Remap everything else read-write, no execute + PageFlags::new().write(true) + }) + .global(cfg!(not(feature = "pti"))) +} diff --git a/src/arch/x86_64/start.rs b/src/arch/x86_64/start.rs index 2fb3066c..ec5ab0cb 100644 --- a/src/arch/x86_64/start.rs +++ b/src/arch/x86_64/start.rs @@ -2,42 +2,42 @@ /// It is increcibly unsafe, and should be minimal in nature /// It must create the IDT with the correct entries, those entries are /// defined in other files inside of the `arch` module - use core::slice; -use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use core::{ + cell::SyncUnsafeCell, + sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering}, +}; + +use log::info; -use crate::allocator; #[cfg(feature = "acpi")] -use acpi; +use crate::acpi; + #[cfg(feature = "graphical_debug")] -use arch::x86_64::graphical_debug; -use crate::arch::x86_64::pti; -use crate::device; -use crate::gdt; -use crate::idt; -use crate::interrupt; -use crate::log; -use crate::memory; -use crate::paging; +use crate::devices::graphical_debug; +use crate::{ + allocator, + cpu_set::LogicalCpuId, + device, gdt, idt, interrupt, misc, + paging::{self, PhysicalAddress, RmmA, RmmArch, TableKind}, + startup::memory::{register_bootloader_areas, register_memory_region, BootloaderMemoryKind}, +}; /// Test of zero values in BSS. -static BSS_TEST_ZERO: usize = 0; +static BSS_TEST_ZERO: SyncUnsafeCell = SyncUnsafeCell::new(0); /// Test of non-zero values in data. -static DATA_TEST_NONZERO: usize = 0xFFFF_FFFF_FFFF_FFFF; -/// Test of zero values in thread BSS -#[thread_local] -static mut TBSS_TEST_ZERO: usize = 0; -/// Test of non-zero values in thread data. -#[thread_local] -static mut TDATA_TEST_NONZERO: usize = 0xFFFF_FFFF_FFFF_FFFF; +static DATA_TEST_NONZERO: SyncUnsafeCell = SyncUnsafeCell::new(usize::max_value()); pub static KERNEL_BASE: AtomicUsize = AtomicUsize::new(0); pub static KERNEL_SIZE: AtomicUsize = AtomicUsize::new(0); -pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(0); + +// TODO: This probably shouldn't be an atomic. Only the BSP starts APs. +pub static CPU_COUNT: AtomicU32 = AtomicU32::new(0); + pub static AP_READY: AtomicBool = AtomicBool::new(false); static BSP_READY: AtomicBool = AtomicBool::new(false); -#[repr(packed)] +#[repr(C, packed(8))] pub struct KernelArgs { kernel_base: u64, kernel_size: u64, @@ -45,33 +45,100 @@ pub struct KernelArgs { stack_size: u64, env_base: u64, env_size: u64, + + /// The base pointer to the saved RSDP. + /// + /// This field can be NULL, and if so, the system has not booted with UEFI or in some other way + /// retrieved the RSDPs. The kernel or a userspace driver will thus try searching the BIOS + /// memory instead. On UEFI systems, searching is not guaranteed to actually work though. + acpi_rsdp_base: u64, + /// The size of the RSDP region. + acpi_rsdp_size: u64, + + areas_base: u64, + areas_size: u64, + + /// The physical base 64-bit pointer to the contiguous bootstrap/initfs. + bootstrap_base: u64, + /// Size of contiguous bootstrap/initfs physical region, not necessarily page aligned. + bootstrap_size: u64, } /// The entry to Rust, all things must be initialized #[no_mangle] -pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { - let env = { - let args = &*args_ptr; - - let kernel_base = args.kernel_base as usize; - let kernel_size = args.kernel_size as usize; - let stack_base = args.stack_base as usize; - let stack_size = args.stack_size as usize; - let env_base = args.env_base as usize; - let env_size = args.env_size as usize; +pub unsafe extern "C" fn kstart(args_ptr: *const KernelArgs) -> ! { + let bootstrap = { + let args = args_ptr.read(); // BSS should already be zero { - assert_eq!(BSS_TEST_ZERO, 0); - assert_eq!(DATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(BSS_TEST_ZERO.get().read(), 0); + assert_eq!(DATA_TEST_NONZERO.get().read(), usize::max_value()); } - KERNEL_BASE.store(kernel_base, Ordering::SeqCst); - KERNEL_SIZE.store(kernel_size, Ordering::SeqCst); - - println!("Kernel: {:X}:{:X}", kernel_base, kernel_base + kernel_size); - println!("Stack: {:X}:{:X}", stack_base, stack_base + stack_size); - println!("Env: {:X}:{:X}", env_base, env_base + env_size); + KERNEL_BASE.store(args.kernel_base as usize, Ordering::SeqCst); + KERNEL_SIZE.store(args.kernel_size as usize, Ordering::SeqCst); + + // Convert env to slice + let env = slice::from_raw_parts( + (args.env_base as usize + crate::PHYS_OFFSET) as *const u8, + args.env_size as usize, + ); + + // Set up serial debug + #[cfg(feature = "serial_debug")] + device::serial::init(); + + // Set up graphical debug + #[cfg(feature = "graphical_debug")] + graphical_debug::init(env); + + #[cfg(feature = "system76_ec_debug")] + device::system76_ec::init(); + + // Initialize logger + crate::log::init_logger(|r| { + use core::fmt::Write; + let _ = writeln!( + super::debug::Writer::new(), + "{}:{} -- {}", + r.target(), + r.level(), + r.args() + ); + }); + + info!("Redox OS starting..."); + info!( + "Kernel: {:X}:{:X}", + { args.kernel_base }, + { args.kernel_base } + { args.kernel_size } + ); + info!( + "Stack: {:X}:{:X}", + { args.stack_base }, + { args.stack_base } + { args.stack_size } + ); + info!( + "Env: {:X}:{:X}", + { args.env_base }, + { args.env_base } + { args.env_size } + ); + info!( + "RSDP: {:X}:{:X}", + { args.acpi_rsdp_base }, + { args.acpi_rsdp_base } + { args.acpi_rsdp_size } + ); + info!( + "Areas: {:X}:{:X}", + { args.areas_base }, + { args.areas_base } + { args.areas_size } + ); + info!( + "Bootstrap: {:X}:{:X}", + { args.bootstrap_base }, + { args.bootstrap_base } + { args.bootstrap_size } + ); // Set up GDT before paging gdt::init(); @@ -79,90 +146,127 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { // Set up IDT before paging idt::init(); - // Initialize memory management - memory::init(0, kernel_base + ((kernel_size + 4095)/4096) * 4096); - - // Initialize paging - let (mut active_table, tcb_offset) = paging::init(0, kernel_base, kernel_base + kernel_size, stack_base, stack_base + stack_size); + // Initialize RMM + register_bootloader_areas(args.areas_base as usize, args.areas_size as usize); + register_memory_region( + args.kernel_base as usize, + args.kernel_size as usize, + BootloaderMemoryKind::Kernel, + ); + register_memory_region( + args.stack_base as usize, + args.stack_size as usize, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.env_base as usize, + args.env_size as usize, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.acpi_rsdp_base as usize, + args.acpi_rsdp_size as usize, + BootloaderMemoryKind::IdentityMap, + ); + register_memory_region( + args.bootstrap_base as usize, + args.bootstrap_size as usize, + BootloaderMemoryKind::IdentityMap, + ); + crate::startup::memory::init(Some(0x100000), None); + + // Initialize PAT + paging::init(); // Set up GDT after paging with TLS - gdt::init_paging(tcb_offset, stack_base + stack_size); + gdt::init_paging( + args.stack_base as usize + args.stack_size as usize, + LogicalCpuId::BSP, + ); // Set up IDT - idt::init_paging(); + idt::init_paging_bsp(); + + crate::alternative::early_init(true); // Set up syscall instruction interrupt::syscall::init(); - // Test tdata and tbss - { - assert_eq!(TBSS_TEST_ZERO, 0); - TBSS_TEST_ZERO += 1; - assert_eq!(TBSS_TEST_ZERO, 1); - assert_eq!(TDATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFF); - TDATA_TEST_NONZERO -= 1; - assert_eq!(TDATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFE); - } - // Reset AP variables CPU_COUNT.store(1, Ordering::SeqCst); AP_READY.store(false, Ordering::SeqCst); BSP_READY.store(false, Ordering::SeqCst); // Setup kernel heap - allocator::init(&mut active_table); + allocator::init(); + + #[cfg(feature = "profiling")] + crate::profiling::init(); + + // Set up double buffer for graphical debug now that heap is available + #[cfg(feature = "graphical_debug")] + graphical_debug::init_heap(); + + idt::init_paging_post_heap(LogicalCpuId::BSP); // Activate memory logging - log::init(); + crate::log::init(); - // Use graphical debug - #[cfg(feature="graphical_debug")] - graphical_debug::init(&mut active_table); + // Initialize miscellaneous processor features + misc::init(LogicalCpuId::BSP); // Initialize devices - device::init(&mut active_table); + device::init(); // Read ACPI tables, starts APs #[cfg(feature = "acpi")] - acpi::init(&mut active_table); + { + acpi::init(if args.acpi_rsdp_base != 0 { + Some((args.acpi_rsdp_base as usize + crate::PHYS_OFFSET) as *const u8) + } else { + None + }); + device::init_after_acpi(); + } // Initialize all of the non-core devices not otherwise needed to complete initialization device::init_noncore(); - // Initialize memory functions after core has loaded - memory::init_noncore(); - - // Stop graphical debug - #[cfg(feature="graphical_debug")] - graphical_debug::fini(&mut active_table); - BSP_READY.store(true, Ordering::SeqCst); - slice::from_raw_parts(env_base as *const u8, env_size) + crate::Bootstrap { + base: crate::memory::Frame::containing(crate::paging::PhysicalAddress::new( + args.bootstrap_base as usize, + )), + page_count: (args.bootstrap_size as usize) / crate::memory::PAGE_SIZE, + env, + } }; - crate::kmain(CPU_COUNT.load(Ordering::SeqCst), env); + crate::kmain(CPU_COUNT.load(Ordering::SeqCst), bootstrap); } -#[repr(packed)] +#[repr(C, packed)] pub struct KernelArgsAp { + // TODO: u32? cpu_id: u64, + page_table: u64, stack_start: u64, stack_end: u64, } /// Entry to rust for an AP -pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! { +pub unsafe extern "C" fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! { let cpu_id = { let args = &*args_ptr; - let cpu_id = args.cpu_id as usize; + let cpu_id = LogicalCpuId::new(args.cpu_id as u32); let bsp_table = args.page_table as usize; - let stack_start = args.stack_start as usize; + let _stack_start = args.stack_start as usize; let stack_end = args.stack_end as usize; - assert_eq!(BSS_TEST_ZERO, 0); - assert_eq!(DATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(BSS_TEST_ZERO.get().read(), 0); + assert_eq!(DATA_TEST_NONZERO.get().read(), usize::max_value()); // Set up GDT before paging gdt::init(); @@ -171,26 +275,25 @@ pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! { idt::init(); // Initialize paging - let tcb_offset = paging::init_ap(cpu_id, bsp_table, stack_start, stack_end); + RmmA::set_table(TableKind::Kernel, PhysicalAddress::new(bsp_table)); + paging::init(); // Set up GDT with TLS - gdt::init_paging(tcb_offset, stack_end); + gdt::init_paging(stack_end, cpu_id); + + #[cfg(feature = "profiling")] + crate::profiling::init(); // Set up IDT for AP - idt::init_paging(); + idt::init_paging_post_heap(cpu_id); + + crate::alternative::early_init(false); // Set up syscall instruction interrupt::syscall::init(); - // Test tdata and tbss - { - assert_eq!(TBSS_TEST_ZERO, 0); - TBSS_TEST_ZERO += 1; - assert_eq!(TBSS_TEST_ZERO, 1); - assert_eq!(TDATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFF); - TDATA_TEST_NONZERO -= 1; - assert_eq!(TDATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFE); - } + // Initialize miscellaneous processor features + misc::init(cpu_id); // Initialize devices (for AP) device::init_ap(); @@ -200,61 +303,9 @@ pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! { cpu_id }; - while ! BSP_READY.load(Ordering::SeqCst) { + while !BSP_READY.load(Ordering::SeqCst) { interrupt::pause(); } crate::kmain_ap(cpu_id); } - -#[naked] -pub unsafe fn usermode(ip: usize, sp: usize, arg: usize) -> ! { - asm!("push r10 - push r11 - push r12 - push r13 - push r14 - push r15" - : // No output - : "{r10}"(gdt::GDT_USER_DATA << 3 | 3), // Data segment - "{r11}"(sp), // Stack pointer - "{r12}"(1 << 9), // Flags - Set interrupt enable flag - "{r13}"(gdt::GDT_USER_CODE << 3 | 3), // Code segment - "{r14}"(ip), // IP - "{r15}"(arg) // Argument - : // No clobbers - : "intel", "volatile"); - - // Unmap kernel - pti::unmap(); - - // Go to usermode - asm!("mov ds, r14d - mov es, r14d - mov fs, r15d - mov gs, r14d - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor rsi, rsi - xor rdi, rdi - xor rbp, rbp - xor r8, r8 - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - fninit - pop rdi - iretq" - : // No output because it never returns - : "{r14}"(gdt::GDT_USER_DATA << 3 | 3), // Data segment - "{r15}"(gdt::GDT_USER_TLS << 3 | 3) // TLS segment - : // No clobbers because it never returns - : "intel", "volatile"); - unreachable!(); -} diff --git a/src/arch/x86_64/stop.rs b/src/arch/x86_64/stop.rs deleted file mode 100644 index 0d4d206e..00000000 --- a/src/arch/x86_64/stop.rs +++ /dev/null @@ -1,52 +0,0 @@ -#[cfg(feature = "acpi")] -use acpi; -use crate::syscall::io::{Io, Pio}; - -#[no_mangle] -pub unsafe extern fn kreset() -> ! { - println!("kreset"); - - // 8042 reset - { - println!("Reset with 8042"); - let mut port = Pio::::new(0x64); - while port.readf(2) {} - port.write(0xFE); - } - - // Use triple fault to guarantee reset - asm!("cli" : : : : "intel", "volatile"); - asm!("lidt cs:0" : : : : "intel", "volatile"); - asm!("int $$3" : : : : "intel", "volatile"); - - unreachable!(); -} - -#[no_mangle] -pub unsafe extern fn kstop() -> ! { - println!("kstop"); - - #[cfg(feature = "acpi")] - acpi::set_global_s_state(5); - - // Magic shutdown code for bochs and qemu (older versions). - for c in "Shutdown".bytes() { - let port = 0x8900; - println!("Shutdown with outb(0x{:X}, '{}')", port, c as char); - Pio::::new(port).write(c); - } - - // Magic shutdown using qemu default ACPI method - { - let port = 0x604; - let data = 0x2000; - println!("Shutdown with outb(0x{:X}, 0x{:X})", port, data); - Pio::::new(port).write(data); - } - - // Magic code for VMWare. Also a hard lock. - println!("Shutdown with cli hlt"); - loop { - asm!("cli; hlt" : : : : "intel", "volatile"); - } -} diff --git a/src/arch/x86_shared/cpuid.rs b/src/arch/x86_shared/cpuid.rs new file mode 100644 index 00000000..25b28598 --- /dev/null +++ b/src/arch/x86_shared/cpuid.rs @@ -0,0 +1,17 @@ +use raw_cpuid::{CpuId, CpuIdResult}; + +pub fn cpuid() -> CpuId { + // FIXME check for cpuid availability during early boot and error out if it doesn't exist. + CpuId::with_cpuid_fn(|a, c| { + #[cfg(target_arch = "x86")] + let result = unsafe { core::arch::x86::__cpuid_count(a, c) }; + #[cfg(target_arch = "x86_64")] + let result = unsafe { core::arch::x86_64::__cpuid_count(a, c) }; + CpuIdResult { + eax: result.eax, + ebx: result.ebx, + ecx: result.ecx, + edx: result.edx, + } + }) +} diff --git a/src/arch/x86_64/debug.rs b/src/arch/x86_shared/debug.rs similarity index 56% rename from src/arch/x86_64/debug.rs rename to src/arch/x86_shared/debug.rs index 3da2a663..576c22f5 100644 --- a/src/arch/x86_64/debug.rs +++ b/src/arch/x86_shared/debug.rs @@ -3,17 +3,24 @@ use core::fmt; use spin::Mutex; use spin::MutexGuard; -use crate::log::{LOG, Log}; +#[cfg(any(feature = "lpss_debug", feature = "serial_debug"))] +use crate::devices::uart_16550::SerialPort; +use crate::log::{Log, LOG}; +#[cfg(feature = "lpss_debug")] +use crate::syscall::io::Mmio; +#[cfg(any(feature = "qemu_debug", feature = "serial_debug"))] +use crate::syscall::io::Pio; #[cfg(feature = "qemu_debug")] use syscall::io::Io; -use crate::syscall::io::Pio; -#[cfg(feature = "serial_debug")] -use crate::devices::uart_16550::SerialPort; -#[cfg(feature = "graphical_debug")] -use super::graphical_debug::{DEBUG_DISPLAY, DebugDisplay}; #[cfg(feature = "serial_debug")] use super::device::serial::COM1; +#[cfg(feature = "lpss_debug")] +use super::device::serial::LPSS; +#[cfg(feature = "system76_ec_debug")] +use super::device::system76_ec::{System76Ec, SYSTEM76_EC}; +#[cfg(feature = "graphical_debug")] +use crate::devices::graphical_debug::{DebugDisplay, DEBUG_DISPLAY}; #[cfg(feature = "qemu_debug")] pub static QEMU: Mutex> = Mutex::new(Pio::::new(0x402)); @@ -22,10 +29,14 @@ pub struct Writer<'a> { log: MutexGuard<'a, Option>, #[cfg(feature = "graphical_debug")] display: MutexGuard<'a, Option>, + #[cfg(feature = "lpss_debug")] + lpss: MutexGuard<'a, Option<&'static mut SerialPort>>>, #[cfg(feature = "qemu_debug")] qemu: MutexGuard<'a, Pio>, #[cfg(feature = "serial_debug")] serial: MutexGuard<'a, SerialPort>>, + #[cfg(feature = "system76_ec_debug")] + system76_ec: MutexGuard<'a, Option>, } impl<'a> Writer<'a> { @@ -34,15 +45,19 @@ impl<'a> Writer<'a> { log: LOG.lock(), #[cfg(feature = "graphical_debug")] display: DEBUG_DISPLAY.lock(), + #[cfg(feature = "lpss_debug")] + lpss: LPSS.lock(), #[cfg(feature = "qemu_debug")] qemu: QEMU.lock(), #[cfg(feature = "serial_debug")] serial: COM1.lock(), + #[cfg(feature = "system76_ec_debug")] + system76_ec: SYSTEM76_EC.lock(), } } - pub fn write(&mut self, buf: &[u8]) { - { + pub fn write(&mut self, buf: &[u8], preserve: bool) { + if preserve { if let Some(ref mut log) = *self.log { log.write(buf); } @@ -51,7 +66,14 @@ impl<'a> Writer<'a> { #[cfg(feature = "graphical_debug")] { if let Some(ref mut display) = *self.display { - let _ = display.write(buf); + display.write(buf); + } + } + + #[cfg(feature = "lpss_debug")] + { + if let Some(ref mut lpss) = *self.lpss { + lpss.write(buf); } } @@ -66,12 +88,19 @@ impl<'a> Writer<'a> { { self.serial.write(buf); } + + #[cfg(feature = "system76_ec_debug")] + { + if let Some(ref mut system76_ec) = *self.system76_ec { + system76_ec.print_slice(buf); + } + } } } impl<'a> fmt::Write for Writer<'a> { fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { - self.write(s.as_bytes()); + self.write(s.as_bytes(), true); Ok(()) } } diff --git a/src/arch/x86_shared/device/cpu.rs b/src/arch/x86_shared/device/cpu.rs new file mode 100644 index 00000000..a15b8b49 --- /dev/null +++ b/src/arch/x86_shared/device/cpu.rs @@ -0,0 +1,281 @@ +use core::fmt::{Result, Write}; + +use crate::arch::cpuid::cpuid; + +pub fn cpu_info(w: &mut W) -> Result { + let cpuid = cpuid(); + + if let Some(info) = cpuid.get_vendor_info() { + writeln!(w, "Vendor: {}", info.as_str())?; + } + + if let Some(brand) = cpuid.get_processor_brand_string() { + writeln!(w, "Model: {}", brand.as_str())?; + } + + if let Some(info) = cpuid.get_processor_frequency_info() { + writeln!(w, "CPU Base MHz: {}", info.processor_base_frequency())?; + writeln!(w, "CPU Max MHz: {}", info.processor_max_frequency())?; + writeln!(w, "Bus MHz: {}", info.bus_frequency())?; + } + + write!(w, "Features:")?; + + if let Some(info) = cpuid.get_feature_info() { + if info.has_fpu() { + write!(w, " fpu")? + }; + if info.has_vme() { + write!(w, " vme")? + }; + if info.has_de() { + write!(w, " de")? + }; + if info.has_pse() { + write!(w, " pse")? + }; + if info.has_tsc() { + write!(w, " tsc")? + }; + if info.has_msr() { + write!(w, " msr")? + }; + if info.has_pae() { + write!(w, " pae")? + }; + if info.has_mce() { + write!(w, " mce")? + }; + + if info.has_cmpxchg8b() { + write!(w, " cx8")? + }; + if info.has_apic() { + write!(w, " apic")? + }; + if info.has_sysenter_sysexit() { + write!(w, " sep")? + }; + if info.has_mtrr() { + write!(w, " mtrr")? + }; + if info.has_pge() { + write!(w, " pge")? + }; + if info.has_mca() { + write!(w, " mca")? + }; + if info.has_cmov() { + write!(w, " cmov")? + }; + if info.has_pat() { + write!(w, " pat")? + }; + + if info.has_pse36() { + write!(w, " pse36")? + }; + if info.has_psn() { + write!(w, " psn")? + }; + if info.has_clflush() { + write!(w, " clflush")? + }; + if info.has_ds() { + write!(w, " ds")? + }; + if info.has_acpi() { + write!(w, " acpi")? + }; + if info.has_mmx() { + write!(w, " mmx")? + }; + if info.has_fxsave_fxstor() { + write!(w, " fxsr")? + }; + if info.has_sse() { + write!(w, " sse")? + }; + + if info.has_sse2() { + write!(w, " sse2")? + }; + if info.has_ss() { + write!(w, " ss")? + }; + if info.has_htt() { + write!(w, " ht")? + }; + if info.has_tm() { + write!(w, " tm")? + }; + if info.has_pbe() { + write!(w, " pbe")? + }; + + if info.has_sse3() { + write!(w, " sse3")? + }; + if info.has_pclmulqdq() { + write!(w, " pclmulqdq")? + }; + if info.has_ds_area() { + write!(w, " dtes64")? + }; + if info.has_monitor_mwait() { + write!(w, " monitor")? + }; + if info.has_cpl() { + write!(w, " ds_cpl")? + }; + if info.has_vmx() { + write!(w, " vmx")? + }; + if info.has_smx() { + write!(w, " smx")? + }; + if info.has_eist() { + write!(w, " est")? + }; + + if info.has_tm2() { + write!(w, " tm2")? + }; + if info.has_ssse3() { + write!(w, " ssse3")? + }; + if info.has_cnxtid() { + write!(w, " cnxtid")? + }; + if info.has_fma() { + write!(w, " fma")? + }; + if info.has_cmpxchg16b() { + write!(w, " cx16")? + }; + if info.has_pdcm() { + write!(w, " pdcm")? + }; + if info.has_pcid() { + write!(w, " pcid")? + }; + if info.has_dca() { + write!(w, " dca")? + }; + + if info.has_sse41() { + write!(w, " sse4_1")? + }; + if info.has_sse42() { + write!(w, " sse4_2")? + }; + if info.has_x2apic() { + write!(w, " x2apic")? + }; + if info.has_movbe() { + write!(w, " movbe")? + }; + if info.has_popcnt() { + write!(w, " popcnt")? + }; + if info.has_tsc_deadline() { + write!(w, " tsc_deadline_timer")? + }; + if info.has_aesni() { + write!(w, " aes")? + }; + if info.has_xsave() { + write!(w, " xsave")? + }; + + if info.has_oxsave() { + write!(w, " xsaveopt")? + }; + if info.has_avx() { + write!(w, " avx")? + }; + if info.has_f16c() { + write!(w, " f16c")? + }; + if info.has_rdrand() { + write!(w, " rdrand")? + }; + } + + if let Some(info) = cpuid.get_extended_processor_and_feature_identifiers() { + if info.has_64bit_mode() { + write!(w, " lm")? + }; + if info.has_rdtscp() { + write!(w, " rdtscp")? + }; + if info.has_1gib_pages() { + write!(w, " pdpe1gb")? + }; + if info.has_execute_disable() { + write!(w, " nx")? + }; + if info.has_syscall_sysret() { + write!(w, " syscall")? + }; + if info.has_prefetchw() { + write!(w, " prefetchw")? + }; + if info.has_lzcnt() { + write!(w, " lzcnt")? + }; + if info.has_lahf_sahf() { + write!(w, " lahf_lm")? + }; + } + + if let Some(info) = cpuid.get_advanced_power_mgmt_info() { + if info.has_invariant_tsc() { + write!(w, " constant_tsc")? + }; + } + + if let Some(info) = cpuid.get_extended_feature_info() { + if info.has_fsgsbase() { + write!(w, " fsgsbase")? + }; + if info.has_tsc_adjust_msr() { + write!(w, " tsc_adjust")? + }; + if info.has_bmi1() { + write!(w, " bmi1")? + }; + if info.has_hle() { + write!(w, " hle")? + }; + if info.has_avx2() { + write!(w, " avx2")? + }; + if info.has_smep() { + write!(w, " smep")? + }; + if info.has_bmi2() { + write!(w, " bmi2")? + }; + if info.has_rep_movsb_stosb() { + write!(w, " erms")? + }; + if info.has_invpcid() { + write!(w, " invpcid")? + }; + if info.has_rtm() { + write!(w, " rtm")? + }; + //if info.has_qm() { write!(w, " qm")? }; + if info.has_fpu_cs_ds_deprecated() { + write!(w, " fpu_seg")? + }; + if info.has_mpx() { + write!(w, " mpx")? + }; + } + + writeln!(w)?; + + Ok(()) +} diff --git a/src/arch/x86_shared/device/hpet.rs b/src/arch/x86_shared/device/hpet.rs new file mode 100644 index 00000000..6b33b4be --- /dev/null +++ b/src/arch/x86_shared/device/hpet.rs @@ -0,0 +1,106 @@ +use super::pit; +use crate::acpi::hpet::Hpet; + +const LEG_RT_CNF: u64 = 2; +const ENABLE_CNF: u64 = 1; + +const TN_VAL_SET_CNF: u64 = 0x40; +const TN_TYPE_CNF: u64 = 0x08; +const TN_INT_ENB_CNF: u64 = 0x04; + +pub(crate) const CAPABILITY_OFFSET: usize = 0x00; +const GENERAL_CONFIG_OFFSET: usize = 0x10; +const GENERAL_INTERRUPT_OFFSET: usize = 0x20; +pub(crate) const MAIN_COUNTER_OFFSET: usize = 0xF0; +// const NUM_TIMER_CAP_MASK: u64 = 0x0f00; +const LEG_RT_CAP: u64 = 0x8000; +const T0_CONFIG_CAPABILITY_OFFSET: usize = 0x100; +pub(crate) const T0_COMPARATOR_OFFSET: usize = 0x108; + +const PER_INT_CAP: u64 = 0x10; + +pub unsafe fn init(hpet: &mut Hpet) -> bool { + println!("HPET Before Init"); + debug(hpet, true); + + // Disable HPET + { + let mut config_word = hpet.read_u64(GENERAL_CONFIG_OFFSET); + config_word &= !(LEG_RT_CNF | ENABLE_CNF); + hpet.write_u64(GENERAL_CONFIG_OFFSET, config_word); + } + + let capability = hpet.read_u64(CAPABILITY_OFFSET); + if capability & LEG_RT_CAP == 0 { + log::warn!("HPET missing capability LEG_RT_CAP"); + return false; + } + + let period_fs = capability >> 32; + let divisor = (pit::RATE as u64 * 1_000_000) / period_fs; + + let t0_capabilities = hpet.read_u64(T0_CONFIG_CAPABILITY_OFFSET); + if t0_capabilities & PER_INT_CAP == 0 { + log::warn!("HPET T0 missing capability PER_INT_CAP"); + return false; + } + + let counter = hpet.read_u64(MAIN_COUNTER_OFFSET); + + let t0_config_word: u64 = TN_VAL_SET_CNF | TN_TYPE_CNF | TN_INT_ENB_CNF; + hpet.write_u64(T0_CONFIG_CAPABILITY_OFFSET, t0_config_word); + // set accumulator value + hpet.write_u64(T0_COMPARATOR_OFFSET, counter + divisor); + // set interval + hpet.write_u64(T0_COMPARATOR_OFFSET, divisor); + + // Enable interrupts from the HPET + { + let mut config_word: u64 = hpet.read_u64(GENERAL_CONFIG_OFFSET); + config_word |= LEG_RT_CNF | ENABLE_CNF; + hpet.write_u64(GENERAL_CONFIG_OFFSET, config_word); + } + + println!("HPET After Init"); + debug(hpet, false); + + true +} + +pub unsafe fn debug(hpet: &mut Hpet, print_caps: bool) { + println!("HPET @ {:#x}", { hpet.base_address.address }); + + if print_caps { + let capability = hpet.read_u64(CAPABILITY_OFFSET); + println!(" caps: {:#x}", capability); + println!(" clock period: {}", (capability >> 32) as u32); + println!(" ID: {:#x}", (capability >> 16) as u16); + println!(" LEG_RT_CAP: {}", capability & (1 << 15) == (1 << 15)); + println!( + " COUNT_SIZE_CAP: {}", + capability & (1 << 13) == (1 << 13) + ); + println!(" timers: {}", (capability >> 8) as u8 & 0x1F); + println!(" revision: {}", capability as u8); + } + + let config_word = hpet.read_u64(GENERAL_CONFIG_OFFSET); + println!(" config: {:#x}", config_word); + + let interrupt_status = hpet.read_u64(GENERAL_INTERRUPT_OFFSET); + println!(" interrupt status: {:#x}", interrupt_status); + + let counter = hpet.read_u64(MAIN_COUNTER_OFFSET); + println!(" counter: {:#x}", counter); + + let t0_capabilities = hpet.read_u64(T0_CONFIG_CAPABILITY_OFFSET); + println!(" T0 caps: {:#x}", t0_capabilities); + println!( + " interrupt routing: {:#x}", + (t0_capabilities >> 32) as u32 + ); + println!(" flags: {:#x}", t0_capabilities as u16); + + let t0_comparator = hpet.read_u64(T0_COMPARATOR_OFFSET); + println!(" T0 comparator: {:#x}", t0_comparator); +} diff --git a/src/arch/x86_shared/device/ioapic.rs b/src/arch/x86_shared/device/ioapic.rs new file mode 100644 index 00000000..74fe13be --- /dev/null +++ b/src/arch/x86_shared/device/ioapic.rs @@ -0,0 +1,448 @@ +use core::{cell::SyncUnsafeCell, fmt, ptr}; + +use alloc::vec::Vec; +use spin::Mutex; + +#[cfg(feature = "acpi")] +use crate::acpi::madt::{self, Madt, MadtEntry, MadtIntSrcOverride, MadtIoApic}; + +use crate::{ + arch::interrupt::irq, + memory::{Frame, KernelMapper}, + paging::{entry::EntryFlags, Page, PageFlags, PhysicalAddress}, +}; + +use super::pic; +use crate::arch::cpuid::cpuid; +#[cfg(target_arch = "x86_64")] +use {crate::memory::RmmA, rmm::Arch}; + +pub struct IoApicRegs { + pointer: *const u32, +} +impl IoApicRegs { + fn ioregsel(&self) -> *const u32 { + self.pointer + } + fn iowin(&self) -> *const u32 { + // offset 0x10 + unsafe { self.pointer.offset(4) } + } + fn write_ioregsel(&mut self, value: u32) { + unsafe { ptr::write_volatile::(self.ioregsel() as *mut u32, value) } + } + fn read_iowin(&self) -> u32 { + unsafe { ptr::read_volatile::(self.iowin()) } + } + fn write_iowin(&mut self, value: u32) { + unsafe { ptr::write_volatile::(self.iowin() as *mut u32, value) } + } + fn read_reg(&mut self, reg: u8) -> u32 { + self.write_ioregsel(reg.into()); + self.read_iowin() + } + fn write_reg(&mut self, reg: u8, value: u32) { + self.write_ioregsel(reg.into()); + self.write_iowin(value); + } + pub fn read_ioapicid(&mut self) -> u32 { + self.read_reg(0x00) + } + pub fn read_ioapicver(&mut self) -> u32 { + self.read_reg(0x01) + } + pub fn read_ioredtbl(&mut self, idx: u8) -> u64 { + assert!(idx < 24); + let lo = self.read_reg(0x10 + idx * 2); + let hi = self.read_reg(0x10 + idx * 2 + 1); + + u64::from(lo) | (u64::from(hi) << 32) + } + pub fn write_ioredtbl(&mut self, idx: u8, value: u64) { + assert!(idx < 24); + + let lo = value as u32; + let hi = (value >> 32) as u32; + + self.write_reg(0x10 + idx * 2, lo); + self.write_reg(0x10 + idx * 2 + 1, hi); + } + + pub fn max_redirection_table_entries(&mut self) -> u8 { + let ver = self.read_ioapicver(); + ((ver & 0x00FF_0000) >> 16) as u8 + } + #[allow(dead_code)] + pub fn id(&mut self) -> u8 { + let id_reg = self.read_ioapicid(); + ((id_reg & 0x0F00_0000) >> 24) as u8 + } +} +pub struct IoApic { + regs: Mutex, + gsi_start: u32, + count: u8, +} +unsafe impl Send for IoApic {} +unsafe impl Sync for IoApic {} +impl IoApic { + #[allow(dead_code)] + pub fn new(regs_base: *const u32, gsi_start: u32) -> Self { + let mut regs = IoApicRegs { pointer: regs_base }; + let count = regs.max_redirection_table_entries(); + + Self { + regs: Mutex::new(regs), + gsi_start, + count, + } + } + /// Map an interrupt vector to a physical local APIC ID of a processor (thus physical mode). + #[allow(dead_code)] + pub fn map(&self, idx: u8, info: MapInfo) { + self.regs.lock().write_ioredtbl(idx, info.as_raw()) + } + pub fn set_mask(&self, gsi: u32, mask: bool) { + let idx = (gsi - self.gsi_start) as u8; + let mut guard = self.regs.lock(); + + let mut reg = guard.read_ioredtbl(idx); + reg &= !(1 << 16); + reg |= u64::from(mask) << 16; + guard.write_ioredtbl(idx, reg); + } +} +#[repr(u8)] +#[derive(Clone, Copy, Debug)] +pub enum ApicTriggerMode { + Edge = 0, + Level = 1, +} +#[repr(u8)] +#[derive(Clone, Copy, Debug)] +pub enum ApicPolarity { + ActiveHigh = 0, + ActiveLow = 1, +} +#[repr(u8)] +#[derive(Clone, Copy, Debug)] +#[allow(unused)] +pub enum DestinationMode { + Physical = 0, + Logical = 1, +} +#[repr(u8)] +#[derive(Clone, Copy, Debug)] +#[allow(unused)] +pub enum DeliveryMode { + Fixed = 0b000, + LowestPriority = 0b001, + Smi = 0b010, + Nmi = 0b100, + Init = 0b101, + ExtInt = 0b111, +} + +#[derive(Clone, Copy, Debug)] +pub struct MapInfo { + pub dest: u8, + pub mask: bool, + pub trigger_mode: ApicTriggerMode, + pub polarity: ApicPolarity, + pub dest_mode: DestinationMode, + pub delivery_mode: DeliveryMode, + pub vector: u8, +} + +impl MapInfo { + pub fn as_raw(&self) -> u64 { + assert!(self.vector >= 0x20); + assert!(self.vector <= 0xFE); + + // TODO: Check for reserved fields. + + (u64::from(self.dest) << 56) + | (u64::from(self.mask) << 16) + | ((self.trigger_mode as u64) << 15) + | ((self.polarity as u64) << 13) + | ((self.dest_mode as u64) << 11) + | ((self.delivery_mode as u64) << 8) + | u64::from(self.vector) + } +} + +impl fmt::Debug for IoApic { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + struct RedirTable<'a>(&'a Mutex); + + impl<'a> fmt::Debug for RedirTable<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut guard = self.0.lock(); + + let count = guard.max_redirection_table_entries(); + f.debug_list() + .entries((0..count).map(|i| guard.read_ioredtbl(i))) + .finish() + } + } + + f.debug_struct("IoApic") + .field("redir_table", &RedirTable(&self.regs)) + .field("gsi_start", &self.gsi_start) + .field("count", &self.count) + .finish() + } +} + +#[derive(Clone, Copy, Debug)] +pub enum TriggerMode { + ConformsToSpecs, + Edge, + Level, +} + +#[derive(Clone, Copy, Debug)] +pub enum Polarity { + ConformsToSpecs, + ActiveHigh, + ActiveLow, +} + +#[derive(Clone, Copy, Debug)] +pub struct Override { + bus_irq: u8, + gsi: u32, + + trigger_mode: TriggerMode, + polarity: Polarity, +} + +// static mut because only the AP initializes the I/O Apic, and when that is done, it's solely +// accessed immutably. +static IOAPICS: SyncUnsafeCell>> = SyncUnsafeCell::new(None); + +// static mut for the same reason as above +static SRC_OVERRIDES: SyncUnsafeCell>> = SyncUnsafeCell::new(None); + +pub fn ioapics() -> &'static [IoApic] { + unsafe { &*IOAPICS.get() } + .as_ref() + .map_or(&[], |vector| &vector[..]) +} +pub fn src_overrides() -> &'static [Override] { + unsafe { &*SRC_OVERRIDES.get() } + .as_ref() + .map_or(&[], |vector| &vector[..]) +} + +#[cfg(feature = "acpi")] +pub unsafe fn handle_ioapic(mapper: &mut KernelMapper, madt_ioapic: &'static MadtIoApic) { + // map the I/O APIC registers + + let frame = Frame::containing(PhysicalAddress::new(madt_ioapic.address as usize)); + #[cfg(target_arch = "x86")] + let page = Page::containing_address(rmm::VirtualAddress::new(crate::IOAPIC_OFFSET)); + #[cfg(target_arch = "x86_64")] + let page = Page::containing_address(RmmA::phys_to_virt(frame.base())); + + assert!(mapper.translate(page.start_address()).is_none()); + + mapper + .get_mut() + .expect("expected KernelMapper not to be locked re-entrant while mapping I/O APIC memory") + .map_phys( + page.start_address(), + frame.base(), + PageFlags::new() + .write(true) + .custom_flag(EntryFlags::NO_CACHE.bits(), true), + ) + .expect("failed to map I/O APIC") + .flush(); + + let ioapic_registers = page.start_address().data() as *const u32; + let ioapic = IoApic::new(ioapic_registers, madt_ioapic.gsi_base); + + assert_eq!( + ioapic.regs.lock().id(), + madt_ioapic.id, + "mismatched ACPI MADT I/O APIC ID, and the ID reported by the I/O APIC" + ); + + (*IOAPICS.get()).get_or_insert_with(Vec::new).push(ioapic); +} +#[cfg(feature = "acpi")] +pub unsafe fn handle_src_override(src_override: &'static MadtIntSrcOverride) { + let flags = src_override.flags; + + let polarity_raw = (flags & 0x0003) as u8; + let trigger_mode_raw = ((flags & 0x000C) >> 2) as u8; + + let polarity = match polarity_raw { + 0b00 => Polarity::ConformsToSpecs, + 0b01 => Polarity::ActiveHigh, + 0b10 => return, // reserved + 0b11 => Polarity::ActiveLow, + + _ => unreachable!(), + }; + + let trigger_mode = match trigger_mode_raw { + 0b00 => TriggerMode::ConformsToSpecs, + 0b01 => TriggerMode::Edge, + 0b10 => return, // reserved + 0b11 => TriggerMode::Level, + _ => unreachable!(), + }; + + let over = Override { + bus_irq: src_override.irq_source, + gsi: src_override.gsi_base, + polarity, + trigger_mode, + }; + (*SRC_OVERRIDES.get()) + .get_or_insert_with(Vec::new) + .push(over); +} + +#[allow(dead_code)] +pub unsafe fn init(active_table: &mut KernelMapper) { + let bsp_apic_id = cpuid().get_feature_info().unwrap().initial_local_apic_id(); // TODO: remove unwraps + + // search the madt for all IOAPICs. + #[cfg(feature = "acpi")] + { + let madt: &'static Madt = match madt::madt() { + Some(m) => m, + // TODO: Parse MP tables too. + None => return, + }; + if madt.flags & madt::FLAG_PCAT != 0 { + pic::disable(); + } + + // find all I/O APICs (usually one). + + for entry in madt.iter() { + match entry { + MadtEntry::IoApic(ioapic) => handle_ioapic(active_table, ioapic), + MadtEntry::IntSrcOverride(src_override) => handle_src_override(src_override), + _ => (), + } + } + } + println!( + "I/O APICs: {:?}, overrides: {:?}", + ioapics(), + src_overrides() + ); + + // map the legacy PC-compatible IRQs (0-15) to 32-47, just like we did with 8259 PIC (if it + // wouldn't have been disabled due to this I/O APIC) + for legacy_irq in 0..=15 { + let (gsi, trigger_mode, polarity) = match get_override(legacy_irq) { + Some(over) => (over.gsi, over.trigger_mode, over.polarity), + None => { + if src_overrides() + .iter() + .any(|over| over.gsi == u32::from(legacy_irq) && over.bus_irq != legacy_irq) + && !src_overrides() + .iter() + .any(|over| over.bus_irq == legacy_irq) + { + // there's an IRQ conflict, making this legacy IRQ inaccessible. + continue; + } + ( + legacy_irq.into(), + TriggerMode::ConformsToSpecs, + Polarity::ConformsToSpecs, + ) + } + }; + let apic = match find_ioapic(gsi) { + Some(ioapic) => ioapic, + None => { + println!("Unable to find a suitable APIC for legacy IRQ {} (GSI {}). It will not be mapped.", legacy_irq, gsi); + continue; + } + }; + let redir_tbl_index = (gsi - apic.gsi_start) as u8; + + let map_info = MapInfo { + // only send to the BSP + dest: bsp_apic_id, + dest_mode: DestinationMode::Physical, + delivery_mode: DeliveryMode::Fixed, + mask: false, + polarity: match polarity { + Polarity::ActiveHigh => ApicPolarity::ActiveHigh, + Polarity::ActiveLow => ApicPolarity::ActiveLow, + Polarity::ConformsToSpecs => ApicPolarity::ActiveHigh, + }, + trigger_mode: match trigger_mode { + TriggerMode::Edge => ApicTriggerMode::Edge, + TriggerMode::Level => ApicTriggerMode::Level, + TriggerMode::ConformsToSpecs => ApicTriggerMode::Edge, + }, + vector: 32 + legacy_irq, + }; + apic.map(redir_tbl_index, map_info); + } + println!( + "I/O APICs: {:?}, overrides: {:?}", + ioapics(), + src_overrides() + ); + irq::set_irq_method(irq::IrqMethod::Apic); + + // tell the firmware that we're using APIC rather than the default 8259 PIC. + + // FIXME: With ACPI moved to userspace, we should instead allow userspace to check whether the + // IOAPIC has been initialized, and then subsequently let some ACPI driver call the AML from + // userspace. + + /*#[cfg(feature = "acpi")] + { + let method = { + let namespace_guard = crate::acpi::ACPI_TABLE.namespace.read(); + if let Some(value) = namespace_guard.as_ref().unwrap().get("\\_PIC") { + value.get_as_method().ok() + } else { + None + } + }; + if let Some(m) = method { + m.execute("\\_PIC".into(), vec!(crate::acpi::aml::AmlValue::Integer(1))); + } + }*/ +} +fn get_override(irq: u8) -> Option<&'static Override> { + src_overrides().iter().find(|over| over.bus_irq == irq) +} +fn resolve(irq: u8) -> u32 { + get_override(irq).map_or(u32::from(irq), |over| over.gsi) +} +fn find_ioapic(gsi: u32) -> Option<&'static IoApic> { + ioapics() + .iter() + .find(|apic| gsi >= apic.gsi_start && gsi < apic.gsi_start + u32::from(apic.count)) +} + +pub unsafe fn mask(irq: u8) { + let gsi = resolve(irq); + let apic = match find_ioapic(gsi) { + Some(a) => a, + None => return, + }; + apic.set_mask(gsi, true); +} +pub unsafe fn unmask(irq: u8) { + let gsi = resolve(irq); + let apic = match find_ioapic(gsi) { + Some(a) => a, + None => return, + }; + apic.set_mask(gsi, false); +} diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs new file mode 100644 index 00000000..3dee3eb9 --- /dev/null +++ b/src/arch/x86_shared/device/local_apic.rs @@ -0,0 +1,243 @@ +use core::{ + cell::SyncUnsafeCell, + ptr::{read_volatile, write_volatile}, +}; +use x86::msr::*; + +use crate::{ + ipi::IpiKind, + paging::{PageFlags, PhysicalAddress}, +}; + +use crate::{arch::cpuid::cpuid, memory::KernelMapper}; + +static LOCAL_APIC: SyncUnsafeCell = SyncUnsafeCell::new(LocalApic { + address: 0, + x2: false, +}); +pub unsafe fn the_local_apic() -> &'static mut LocalApic { + &mut *LOCAL_APIC.get() +} + +pub unsafe fn init(active_table: &mut KernelMapper) { + the_local_apic().init(active_table); +} + +pub unsafe fn init_ap() { + the_local_apic().init_ap(); +} + +/// Local APIC +pub struct LocalApic { + pub address: usize, + pub x2: bool, +} + +impl LocalApic { + unsafe fn init(&mut self, mapper: &mut KernelMapper) { + let mapper = mapper + .get_mut() + .expect("expected KernelMapper not to be locked re-entrant while initializing LAPIC"); + + let physaddr = PhysicalAddress::new(rdmsr(IA32_APIC_BASE) as usize & 0xFFFF_0000); + #[cfg(target_arch = "x86")] + let virtaddr = rmm::VirtualAddress::new(crate::LAPIC_OFFSET); + #[cfg(target_arch = "x86_64")] + let virtaddr = { + use rmm::Arch; + crate::memory::RmmA::phys_to_virt(physaddr) + }; + + self.address = virtaddr.data(); + self.x2 = cpuid() + .get_feature_info() + .map_or(false, |feature_info| feature_info.has_x2apic()); + + if !self.x2 { + log::info!("Detected xAPIC at {:#x}", physaddr.data()); + if let Some((_entry, _, flush)) = mapper.unmap_phys(virtaddr, true) { + // Unmap xAPIC page if already mapped + flush.flush(); + } + mapper + .map_phys(virtaddr, physaddr, PageFlags::new().write(true)) + .expect("failed to map local APIC memory") + .flush(); + } else { + log::info!("Detected x2APIC"); + } + + self.init_ap(); + } + + unsafe fn init_ap(&mut self) { + if self.x2 { + wrmsr(IA32_APIC_BASE, rdmsr(IA32_APIC_BASE) | 1 << 10); + wrmsr(IA32_X2APIC_SIVR, 0x100); + } else { + self.write(0xF0, 0x100); + } + self.setup_error_int(); + //self.setup_timer(); + } + + unsafe fn read(&self, reg: u32) -> u32 { + read_volatile((self.address + reg as usize) as *const u32) + } + + unsafe fn write(&mut self, reg: u32, value: u32) { + write_volatile((self.address + reg as usize) as *mut u32, value); + } + + pub fn id(&self) -> u32 { + if self.x2 { + unsafe { rdmsr(IA32_X2APIC_APICID) as u32 } + } else { + unsafe { self.read(0x20) } + } + } + + pub fn version(&self) -> u32 { + if self.x2 { + unsafe { rdmsr(IA32_X2APIC_VERSION) as u32 } + } else { + unsafe { self.read(0x30) } + } + } + + pub fn icr(&self) -> u64 { + if self.x2 { + unsafe { rdmsr(IA32_X2APIC_ICR) } + } else { + unsafe { (self.read(0x310) as u64) << 32 | self.read(0x300) as u64 } + } + } + + pub fn set_icr(&mut self, value: u64) { + if self.x2 { + unsafe { + wrmsr(IA32_X2APIC_ICR, value); + } + } else { + unsafe { + const PENDING: u32 = 1 << 12; + while self.read(0x300) & PENDING == PENDING { + core::hint::spin_loop(); + } + self.write(0x310, (value >> 32) as u32); + self.write(0x300, value as u32); + while self.read(0x300) & PENDING == PENDING { + core::hint::spin_loop(); + } + } + } + } + + pub fn ipi(&mut self, apic_id: u32, kind: IpiKind) { + let mut icr = 0x40 | kind as u64; + if self.x2 { + icr |= u64::from(apic_id) << 32; + } else { + icr |= u64::from(apic_id) << 56; + } + self.set_icr(icr); + } + pub fn ipi_nmi(&mut self, apic_id: u32) { + let shift = if self.x2 { 32 } else { 56 }; + self.set_icr((u64::from(apic_id) << shift) | (1 << 14) | (0b100 << 8)); + } + + pub unsafe fn eoi(&mut self) { + if self.x2 { + wrmsr(IA32_X2APIC_EOI, 0); + } else { + self.write(0xB0, 0); + } + } + /// Reads the Error Status Register. + pub unsafe fn esr(&mut self) -> u32 { + if self.x2 { + // update the ESR to the current state of the local apic. + wrmsr(IA32_X2APIC_ESR, 0); + // read the updated value + rdmsr(IA32_X2APIC_ESR) as u32 + } else { + self.write(0x280, 0); + self.read(0x280) + } + } + pub unsafe fn lvt_timer(&mut self) -> u32 { + if self.x2 { + rdmsr(IA32_X2APIC_LVT_TIMER) as u32 + } else { + self.read(0x320) + } + } + pub unsafe fn set_lvt_timer(&mut self, value: u32) { + if self.x2 { + wrmsr(IA32_X2APIC_LVT_TIMER, u64::from(value)); + } else { + self.write(0x320, value); + } + } + pub unsafe fn init_count(&mut self) -> u32 { + if self.x2 { + rdmsr(IA32_X2APIC_INIT_COUNT) as u32 + } else { + self.read(0x380) + } + } + pub unsafe fn set_init_count(&mut self, initial_count: u32) { + if self.x2 { + wrmsr(IA32_X2APIC_INIT_COUNT, u64::from(initial_count)); + } else { + self.write(0x380, initial_count); + } + } + pub unsafe fn cur_count(&mut self) -> u32 { + if self.x2 { + rdmsr(IA32_X2APIC_CUR_COUNT) as u32 + } else { + self.read(0x390) + } + } + pub unsafe fn div_conf(&mut self) -> u32 { + if self.x2 { + rdmsr(IA32_X2APIC_DIV_CONF) as u32 + } else { + self.read(0x3E0) + } + } + pub unsafe fn set_div_conf(&mut self, div_conf: u32) { + if self.x2 { + wrmsr(IA32_X2APIC_DIV_CONF, u64::from(div_conf)); + } else { + self.write(0x3E0, div_conf); + } + } + pub unsafe fn lvt_error(&mut self) -> u32 { + if self.x2 { + rdmsr(IA32_X2APIC_LVT_ERROR) as u32 + } else { + self.read(0x370) + } + } + pub unsafe fn set_lvt_error(&mut self, lvt_error: u32) { + if self.x2 { + wrmsr(IA32_X2APIC_LVT_ERROR, u64::from(lvt_error)); + } else { + self.write(0x370, lvt_error); + } + } + unsafe fn setup_error_int(&mut self) { + let vector = 49u32; + self.set_lvt_error(vector); + } +} + +#[repr(u8)] +pub enum LvtTimerMode { + OneShot = 0b00, + Periodic = 0b01, + TscDeadline = 0b10, +} diff --git a/src/arch/x86_shared/device/mod.rs b/src/arch/x86_shared/device/mod.rs new file mode 100644 index 00000000..5617db38 --- /dev/null +++ b/src/arch/x86_shared/device/mod.rs @@ -0,0 +1,80 @@ +use crate::memory::KernelMapper; + +pub mod cpu; +#[cfg(feature = "acpi")] +pub mod hpet; +pub mod ioapic; +pub mod local_apic; +pub mod pic; +pub mod pit; +pub mod serial; +#[cfg(feature = "system76_ec_debug")] +pub mod system76_ec; + +#[cfg(feature = "x86_kvm_pv")] +pub mod tsc; + +pub unsafe fn init() { + pic::init(); + local_apic::init(&mut KernelMapper::lock()); + + // Run here for the side-effect of printing if KVM was used to avoid interleaved logs. + tsc::get_kvm_support(); +} +pub unsafe fn init_after_acpi() { + // this will disable the IOAPIC if needed. + //ioapic::init(mapper); +} + +#[cfg(feature = "acpi")] +unsafe fn init_hpet() -> bool { + use crate::acpi::ACPI_TABLE; + if let Some(ref mut hpet) = *ACPI_TABLE.hpet.write() { + if cfg!(target_arch = "x86") { + //TODO: fix HPET on i686 + log::warn!("HPET found but implemented on i686"); + return false; + } + hpet::init(hpet) + } else { + false + } +} + +#[cfg(not(feature = "acpi"))] +unsafe fn init_hpet() -> bool { + false +} + +pub unsafe fn init_noncore() { + log::info!("Initializing system timer"); + + #[cfg(feature = "x86_kvm_pv")] + if tsc::init() { + log::info!("TSC used as system clock source"); + } + + if init_hpet() { + log::info!("HPET used as system timer"); + } else { + pit::init(); + log::info!("PIT used as system timer"); + } + + log::info!("Initializing serial"); + serial::init(); + log::info!("Finished initializing devices"); +} + +pub unsafe fn init_ap() { + local_apic::init_ap(); + + #[cfg(feature = "x86_kvm_pv")] + tsc::init(); +} + +#[derive(Default)] +pub struct ArchPercpuMisc { + #[cfg(feature = "x86_kvm_pv")] + pub tsc_info: tsc::TscPercpu, +} diff --git a/src/arch/x86_shared/device/pic.rs b/src/arch/x86_shared/device/pic.rs new file mode 100644 index 00000000..79fe74b2 --- /dev/null +++ b/src/arch/x86_shared/device/pic.rs @@ -0,0 +1,94 @@ +use core::cell::SyncUnsafeCell; + +use crate::{ + arch::interrupt::irq, + syscall::io::{Io, Pio}, +}; + +static MASTER: SyncUnsafeCell = SyncUnsafeCell::new(Pic::new(0x20)); +static SLAVE: SyncUnsafeCell = SyncUnsafeCell::new(Pic::new(0xA0)); + +// SAFETY: must be main thread +pub unsafe fn master<'a>() -> &'a mut Pic { + &mut *MASTER.get() +} +// SAFETY: must be main thread +pub unsafe fn slave<'a>() -> &'a mut Pic { + &mut *SLAVE.get() +} + +pub unsafe fn init() { + let master = master(); + let slave = slave(); + + // Start initialization + master.cmd.write(0x11); + slave.cmd.write(0x11); + + // Set offsets + master.data.write(0x20); + slave.data.write(0x28); + + // Set up cascade + master.data.write(4); + slave.data.write(2); + + // Set up interrupt mode (1 is 8086/88 mode, 2 is auto EOI) + master.data.write(1); + slave.data.write(1); + + // Unmask interrupts + master.data.write(0); + slave.data.write(0); + + // Ack remaining interrupts + master.ack(); + slave.ack(); + + // probably already set to PIC, but double-check + irq::set_irq_method(irq::IrqMethod::Pic); +} + +pub unsafe fn disable() { + master().data.write(0xFF); + slave().data.write(0xFF); +} + +pub struct Pic { + cmd: Pio, + data: Pio, +} + +impl Pic { + pub const fn new(port: u16) -> Pic { + Pic { + cmd: Pio::new(port), + data: Pio::new(port + 1), + } + } + + pub fn ack(&mut self) { + self.cmd.write(0x20); + } + + pub fn mask_set(&mut self, irq: u8) { + assert!(irq < 8); + + let mut mask = self.data.read(); + mask |= 1 << irq; + self.data.write(mask); + } + + pub fn mask_clear(&mut self, irq: u8) { + assert!(irq < 8); + + let mut mask = self.data.read(); + mask &= !(1 << irq); + self.data.write(mask); + } + /// A bitmap of all currently servicing IRQs. Spurious IRQs will not have this bit set + pub fn isr(&mut self) -> u8 { + self.cmd.write(0x0A); + self.cmd.read() // note that cmd is read, rather than data + } +} diff --git a/src/arch/x86_shared/device/pit.rs b/src/arch/x86_shared/device/pit.rs new file mode 100644 index 00000000..e275a972 --- /dev/null +++ b/src/arch/x86_shared/device/pit.rs @@ -0,0 +1,46 @@ +use core::cell::SyncUnsafeCell; + +use crate::syscall::io::{Io, Pio}; + +static CHAN0: SyncUnsafeCell> = SyncUnsafeCell::new(Pio::new(0x40)); +//pub static mut CHAN1: Pio = Pio::new(0x41); +//pub static mut CHAN2: Pio = Pio::new(0x42); +static COMMAND: SyncUnsafeCell> = SyncUnsafeCell::new(Pio::new(0x43)); + +// SAFETY: must be externally syncd +pub unsafe fn chan0<'a>() -> &'a mut Pio { + &mut *CHAN0.get() +} +// SAFETY: must be externally syncd +pub unsafe fn command<'a>() -> &'a mut Pio { + &mut *COMMAND.get() +} + +const SELECT_CHAN0: u8 = 0b00 << 6; +const ACCESS_LATCH: u8 = 0b00 << 4; +const ACCESS_LOHI: u8 = 0b11 << 4; +const MODE_2: u8 = 0b010 << 1; + +// 1 / (1.193182 MHz) = 838,095,110 femtoseconds ~= 838.095 ns +pub const PERIOD_FS: u128 = 838_095_110; + +// 4847 / (1.193182 MHz) = 4,062,247 ns ~= 4.1 ms or 246 Hz +pub const CHAN0_DIVISOR: u16 = 4847; + +// Calculated interrupt period in nanoseconds based on divisor and period +pub const RATE: u128 = (CHAN0_DIVISOR as u128 * PERIOD_FS) / 1_000_000; + +pub unsafe fn init() { + command().write(SELECT_CHAN0 | ACCESS_LOHI | MODE_2); + chan0().write(CHAN0_DIVISOR as u8); + chan0().write((CHAN0_DIVISOR >> 8) as u8); +} + +pub unsafe fn read() -> u16 { + command().write(SELECT_CHAN0 | ACCESS_LATCH); + let low = chan0().read(); + let high = chan0().read(); + let counter = ((high as u16) << 8) | (low as u16); + // Counter is inverted, subtract from CHAN0_DIVISOR + CHAN0_DIVISOR.saturating_sub(counter) +} diff --git a/src/arch/x86_shared/device/rtc.rs b/src/arch/x86_shared/device/rtc.rs new file mode 100644 index 00000000..e69de29b diff --git a/src/arch/x86_shared/device/serial.rs b/src/arch/x86_shared/device/serial.rs new file mode 100644 index 00000000..1a487dc4 --- /dev/null +++ b/src/arch/x86_shared/device/serial.rs @@ -0,0 +1,45 @@ +#[cfg(feature = "lpss_debug")] +use crate::syscall::io::Mmio; +use crate::{devices::uart_16550::SerialPort, syscall::io::Pio}; +use spin::Mutex; + +pub static COM1: Mutex>> = Mutex::new(SerialPort::>::new(0x3F8)); +pub static COM2: Mutex>> = Mutex::new(SerialPort::>::new(0x2F8)); +// pub static COM3: Mutex>> = Mutex::new(SerialPort::>::new(0x3E8)); +// pub static COM4: Mutex>> = Mutex::new(SerialPort::>::new(0x2E8)); + +#[cfg(feature = "lpss_debug")] +pub static LPSS: Mutex>>> = Mutex::new(None); + +pub unsafe fn init() { + COM1.lock().init(); + COM2.lock().init(); + + #[cfg(feature = "lpss_debug")] + { + // TODO: Make this configurable + let address = crate::PHYS_OFFSET + 0xFE032000; + + { + use crate::{ + memory::{Frame, PhysicalAddress}, + paging::{entry::EntryFlags, ActivePageTable, Page, VirtualAddress}, + }; + + let mut active_table = ActivePageTable::new(); + let page = Page::containing_address(VirtualAddress::new(address)); + let frame = Frame::containing(PhysicalAddress::new(address - crate::PHYS_OFFSET)); + let result = active_table.map_to( + page, + frame, + EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE, + ); + result.flush(&mut active_table); + } + + let lpss = SerialPort::>::new(crate::PHYS_OFFSET + 0xFE032000); + lpss.init(); + + *LPSS.lock() = Some(lpss); + } +} diff --git a/src/arch/x86_shared/device/system76_ec.rs b/src/arch/x86_shared/device/system76_ec.rs new file mode 100644 index 00000000..dca4d5ef --- /dev/null +++ b/src/arch/x86_shared/device/system76_ec.rs @@ -0,0 +1,89 @@ +use spin::Mutex; +use syscall::io::{Io, Pio}; + +pub static SYSTEM76_EC: Mutex> = Mutex::new(None); + +pub fn init() { + *SYSTEM76_EC.lock() = System76Ec::new(); +} + +pub struct System76Ec { + base: u16, +} + +impl System76Ec { + pub fn new() -> Option { + let mut system76_ec = Self { base: 0x0E00 }; + if system76_ec.probe() { + Some(system76_ec) + } else { + None + } + } + + #[inline(always)] + pub fn read(&mut self, addr: u8) -> u8 { + Pio::::new(self.base + addr as u16).read() + } + + #[inline(always)] + pub fn write(&mut self, addr: u8, data: u8) { + Pio::::new(self.base + addr as u16).write(data) + } + + pub fn probe(&mut self) -> bool { + // Send probe command + self.write(0, 1); + + // Wait for response + let mut timeout = 1_000_000; + while timeout > 0 { + if self.read(0) == 0 { + break; + } + timeout -= 1; + } + if timeout == 0 { + return false; + } + + // Return false on command error + if self.read(1) != 0 { + return false; + } + + // Must receive 0x76, 0xEC as signature + self.read(2) == 0x76 && self.read(3) == 0xEC + } + + pub fn flush(&mut self) { + // Send command + self.write(0, 4); + + // TODO: timeout + while self.read(0) != 0 {} + + // Clear length + self.write(3, 0); + } + + pub fn print(&mut self, byte: u8) { + // Read length + let len = self.read(3); + // Write data at offset + self.write(len + 4, byte); + // Update length + self.write(3, len + 1); + + // If we hit the end of the buffer, or were given a newline, flush + if byte == b'\n' || len >= 128 { + self.flush(); + } + } + + pub fn print_slice(&mut self, bytes: &[u8]) { + for &byte in bytes { + self.print(byte); + } + } +} diff --git a/src/arch/x86_shared/device/tsc.rs b/src/arch/x86_shared/device/tsc.rs new file mode 100644 index 00000000..a8f7c72a --- /dev/null +++ b/src/arch/x86_shared/device/tsc.rs @@ -0,0 +1,159 @@ +use core::{cell::Cell, ptr::addr_of}; + +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::__cpuid; + +#[cfg(target_arch = "x86")] +use core::arch::x86::__cpuid; + +use rmm::Arch; +use spin::Once; + +use crate::{memory::allocate_frame, percpu::PercpuBlock}; + +pub struct KvmSupport { + max_leaf: u32, + supp_feats: KvmFeatureBits, +} +bitflags! { + // https://www.kernel.org/doc/html/latest/virt/kvm/x86/cpuid.html + #[derive(Debug)] + struct KvmFeatureBits: u32 { + const CLOCKSOURCE = 1 << 0; + const CLOCKSOURCE2 = 1 << 3; + const CLOCKSOURCE_STABLE = 1 << 24; + } +} + +// https://www.kernel.org/doc/html/v5.9/virt/kvm/msr.html +#[repr(C, packed)] +#[derive(Clone, Copy, Debug)] +struct PvclockVcpuTimeInfo { + version: u32, + pad: u32, + tsc_timestamp: u64, + system_time: u64, + tsc_to_system_mul: u32, + tsc_shift: i8, + flags: u8, + _pad: [u8; 2], +} + +const MSR_KVM_SYSTEM_TIME_NEW: u32 = 0x4b564d01; +const MSR_KVM_WALL_CLOCK_NEW: u32 = 0x4b564d00; + +pub struct TscPercpu { + vcpu_page: Cell<*const PvclockVcpuTimeInfo>, + prev: Cell, +} +impl Default for TscPercpu { + fn default() -> Self { + Self { + vcpu_page: Cell::new(core::ptr::null()), + prev: Cell::new(0), + } + } +} + +pub fn monotonic_absolute() -> Option { + let inf = &PercpuBlock::current().misc_arch_info.tsc_info; + let ptr = inf.vcpu_page.get(); + if ptr.is_null() { + return None; + } + loop { + unsafe { + let cur_version = addr_of!((*ptr).version).read_volatile(); + if cur_version & 1 == 1 { + continue; + } + let elapsed_ticks = + x86::time::rdtsc().saturating_sub(addr_of!((*ptr).tsc_timestamp).read_volatile()); + let tsc_shift = addr_of!((*ptr).tsc_shift).read_volatile(); + let elapsed = if tsc_shift >= 0 { + elapsed_ticks.checked_shl(tsc_shift as u32).unwrap() + } else { + elapsed_ticks.checked_shr((-tsc_shift) as u32).unwrap() + }; + let system_time = addr_of!((*ptr).system_time).read_volatile(); + let tsc_to_system_mul = addr_of!((*ptr).tsc_to_system_mul).read_volatile(); + let new_version = addr_of!((*ptr).version).read_volatile(); + if new_version != cur_version || new_version & 1 == 1 { + continue; + } + let delta = (u128::from(elapsed) * u128::from(tsc_to_system_mul)) >> 32; + let time = u128::from(system_time) + delta; + let prev = inf.prev.replace(time); + if prev > time { + // TODO + log::error!("TSC wraparound ({prev} > {time})"); + return None; + } + assert!(prev <= time); + return Some(time); + } + } +} + +pub fn get_kvm_support() -> &'static Option { + static KVM_SUPPORT: Once> = Once::new(); + + KVM_SUPPORT.call_once(|| { + let res = unsafe { __cpuid(0x4000_0000) }; + if [res.ebx, res.ecx, res.edx].map(u32::to_le_bytes) != [*b"KVMK", *b"VMKV", *b"M\0\0\0"] { + return None; + } + let max_leaf = res.eax; + if max_leaf < 0x4000_0001 { + return None; + } + let res = unsafe { __cpuid(0x4000_0001) }; + + let supp_feats = KvmFeatureBits::from_bits_retain(res.eax); + + log::info!("Detected KVM paravirtualization support, features {supp_feats:?}"); + + Some(KvmSupport { + max_leaf, + supp_feats, + }) + }) +} + +pub unsafe fn init() -> bool { + let cpuid = crate::cpuid::cpuid(); + if !cpuid.get_feature_info().map_or(false, |f| f.has_tsc()) { + return false; + } + + let kvm_support = get_kvm_support(); + + if let Some(kvm_support) = kvm_support + && kvm_support + .supp_feats + .contains(KvmFeatureBits::CLOCKSOURCE2 | KvmFeatureBits::CLOCKSOURCE_STABLE) + { + let frame = allocate_frame().expect("failed to allocate timer page"); + x86::msr::wrmsr(MSR_KVM_SYSTEM_TIME_NEW, (frame.base().data() as u64) | 1); + let ptr = + crate::paging::RmmA::phys_to_virt(frame.base()).data() as *const PvclockVcpuTimeInfo; + PercpuBlock::current() + .misc_arch_info + .tsc_info + .vcpu_page + .set(ptr); + + /*let tsc_ghz = loop { + let val1 = ptr.read_volatile(); + let val2 = ptr.read_volatile(); + if val1.version & 1 == 1 || val2.version & 1 == 1 || val1.version != val2.version { + continue; + } + let val1 + break tsc_hz / 1_000_000_000; + };*/ + true + } else { + false + } +} diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs new file mode 100644 index 00000000..899c5109 --- /dev/null +++ b/src/arch/x86_shared/idt.rs @@ -0,0 +1,368 @@ +use core::{ + cell::SyncUnsafeCell, + mem, + sync::atomic::{AtomicU32, Ordering}, +}; + +use alloc::boxed::Box; +use hashbrown::HashMap; + +use x86::{ + dtables::{self, DescriptorTablePointer}, + segmentation::Descriptor as X86IdtEntry, +}; + +#[cfg(target_arch = "x86_64")] +use crate::interrupt::irq::{__generic_interrupts_end, __generic_interrupts_start}; +use crate::{cpu_set::LogicalCpuId, interrupt::*, ipi::IpiKind}; + +use spin::RwLock; + +pub static INIT_IDT: SyncUnsafeCell<[IdtEntry; 32]> = SyncUnsafeCell::new([IdtEntry::new(); 32]); + +pub type IdtEntries = [IdtEntry; 256]; +pub type IdtReservations = [AtomicU32; 8]; + +#[repr(C)] +pub struct Idt { + pub(crate) entries: IdtEntries, + reservations: IdtReservations, +} +impl Idt { + pub const fn new() -> Self { + Self { + entries: [IdtEntry::new(); 256], + reservations: new_idt_reservations(), + } + } + #[inline] + pub fn set_reserved_mut(&mut self, index: u8, reserved: bool) { + let byte_index = index / 32; + let bit = index % 32; + + *{ &mut self.reservations[usize::from(byte_index)] }.get_mut() |= + u32::from(reserved) << bit; + } +} + +static INIT_BSP_IDT: SyncUnsafeCell = SyncUnsafeCell::new(Idt::new()); + +// TODO: VecMap? +pub static IDTS: RwLock>> = RwLock::new(None); + +#[inline] +pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool { + let byte_index = index / 32; + let bit = index % 32; + + { + &IDTS + .read() + .as_ref() + .unwrap() + .get(&cpu_id) + .unwrap() + .reservations[usize::from(byte_index)] + } + .load(Ordering::Acquire) + & (1 << bit) + != 0 +} + +#[inline] +pub fn set_reserved(cpu_id: LogicalCpuId, index: u8, reserved: bool) { + let byte_index = index / 32; + let bit = index % 32; + + { + &IDTS + .read() + .as_ref() + .unwrap() + .get(&cpu_id) + .unwrap() + .reservations[usize::from(byte_index)] + } + .fetch_or(u32::from(reserved) << bit, Ordering::AcqRel); +} + +pub fn available_irqs_iter(cpu_id: LogicalCpuId) -> impl Iterator + 'static { + (32..=254).filter(move |&index| !is_reserved(cpu_id, index)) +} + +#[cfg(target_arch = "x86")] +macro_rules! use_irq( + ( $idt: expr, $number:literal, $func:ident ) => {{ + $idt[$number].set_func($func); + }} +); + +#[cfg(target_arch = "x86")] +macro_rules! use_default_irqs( + ($idt:expr) => {{ + use crate::interrupt::irq::*; + default_irqs!($idt, use_irq); + }} +); + +pub unsafe fn init() { + let idt = &mut *INIT_IDT.get(); + set_exceptions(idt); + dtables::lidt(&DescriptorTablePointer::new(&idt)); +} + +fn set_exceptions(idt: &mut [IdtEntry]) { + // Set up exceptions + idt[0].set_func(exception::divide_by_zero); + idt[1].set_func(exception::debug); + idt[2].set_func(exception::non_maskable); + idt[3].set_func(exception::breakpoint); + idt[3].set_flags(IdtFlags::PRESENT | IdtFlags::RING_3 | IdtFlags::INTERRUPT); + idt[4].set_func(exception::overflow); + idt[5].set_func(exception::bound_range); + idt[6].set_func(exception::invalid_opcode); + idt[7].set_func(exception::device_not_available); + idt[8].set_func(exception::double_fault); + // 9 no longer available + idt[10].set_func(exception::invalid_tss); + idt[11].set_func(exception::segment_not_present); + idt[12].set_func(exception::stack_segment); + idt[13].set_func(exception::protection); + idt[14].set_func(exception::page); + // 15 reserved + idt[16].set_func(exception::fpu_fault); + idt[17].set_func(exception::alignment_check); + idt[18].set_func(exception::machine_check); + idt[19].set_func(exception::simd); + idt[20].set_func(exception::virtualization); + // 21 through 29 reserved + idt[30].set_func(exception::security); + // 31 reserved +} + +const fn new_idt_reservations() -> [AtomicU32; 8] { + [ + AtomicU32::new(0), + AtomicU32::new(0), + AtomicU32::new(0), + AtomicU32::new(0), + AtomicU32::new(0), + AtomicU32::new(0), + AtomicU32::new(0), + AtomicU32::new(0), + ] +} + +/// Initialize the IDT for a processor +pub unsafe fn init_paging_post_heap(cpu_id: LogicalCpuId) { + let mut idts_guard = IDTS.write(); + let idts_btree = idts_guard.get_or_insert_with(HashMap::new); + + if cpu_id == LogicalCpuId::BSP { + idts_btree.insert(cpu_id, &mut *INIT_BSP_IDT.get()); + } else { + let idt = idts_btree + .entry(cpu_id) + .or_insert_with(|| Box::leak(Box::new(Idt::new()))); + init_generic(cpu_id, idt); + } +} + +/// Initializes a fully functional IDT for use before it be moved into the map. This is ONLY called +/// on the BSP, since the kernel heap is ready for the APs. +pub unsafe fn init_paging_bsp() { + init_generic(LogicalCpuId::BSP, &mut *INIT_BSP_IDT.get()); +} + +/// Initializes an IDT for any type of processor. +pub unsafe fn init_generic(cpu_id: LogicalCpuId, idt: &mut Idt) { + let (current_idt, current_reservations) = (&mut idt.entries, &mut idt.reservations); + + let idtr: DescriptorTablePointer = DescriptorTablePointer { + limit: (current_idt.len() * mem::size_of::() - 1) as u16, + base: current_idt.as_ptr() as *const X86IdtEntry, + }; + + let backup_ist = { + // We give Non-Maskable Interrupts, Double Fault, and Machine Check exceptions separate + // stacks, since these (unless we are going to set up NMI watchdogs like Linux does) are + // considered the most fatal, especially Double Faults which are caused by errors __when + // accessing the system IDT__. If that goes wrong, then kernel memory may be partially + // corrupt, and we want a separate stack. + // + // Note that each CPU has its own "backup interrupt stack". + let index = 1_u8; + + // Put them in the 1st entry of the IST. + #[cfg(target_arch = "x86_64")] // TODO: x86 + { + use crate::paging::PAGE_SIZE; + // Allocate 64 KiB of stack space for the backup stack. + const BACKUP_STACK_SIZE: usize = PAGE_SIZE << 4; + let frames = crate::memory::allocate_p2frame(4) + .expect("failed to allocate pages for backup interrupt stack"); + + use crate::paging::{RmmA, RmmArch}; + + // Physical pages are mapped linearly. So is the linearly mapped virtual memory. + let base_address = RmmA::phys_to_virt(frames.base()); + + // Stack always grows downwards. + let address = base_address.data() + BACKUP_STACK_SIZE; + + (*crate::gdt::pcr()).tss.ist[usize::from(index - 1)] = address as u64; + } + + index + }; + + set_exceptions(current_idt); + current_idt[2].set_ist(backup_ist); + current_idt[8].set_ist(backup_ist); + current_idt[18].set_ist(backup_ist); + + #[cfg(target_arch = "x86_64")] + assert_eq!( + __generic_interrupts_end as usize - __generic_interrupts_start as usize, + 224 * 8 + ); + + #[cfg(target_arch = "x86_64")] + for i in 0..224 { + current_idt[i + 32].set_func(mem::transmute(__generic_interrupts_start as usize + i * 8)); + } + + // reserve bits 31:0, i.e. the first 32 interrupts, which are reserved for exceptions + *current_reservations[0].get_mut() |= 0x0000_0000_FFFF_FFFF; + + if cpu_id == LogicalCpuId::BSP { + // Set up IRQs + current_idt[32].set_func(irq::pit_stack); + current_idt[33].set_func(irq::keyboard); + current_idt[34].set_func(irq::cascade); + current_idt[35].set_func(irq::com2); + current_idt[36].set_func(irq::com1); + current_idt[37].set_func(irq::lpt2); + current_idt[38].set_func(irq::floppy); + current_idt[39].set_func(irq::lpt1); + current_idt[40].set_func(irq::rtc); + current_idt[41].set_func(irq::pci1); + current_idt[42].set_func(irq::pci2); + current_idt[43].set_func(irq::pci3); + current_idt[44].set_func(irq::mouse); + current_idt[45].set_func(irq::fpu); + current_idt[46].set_func(irq::ata1); + current_idt[47].set_func(irq::ata2); + current_idt[48].set_func(irq::lapic_timer); + current_idt[49].set_func(irq::lapic_error); + + // reserve bits 49:32, which are for the standard IRQs, and for the local apic timer and error. + *current_reservations[1].get_mut() |= 0x0003_FFFF; + } else { + // TODO: use_default_irqs! but also the legacy IRQs that are only needed on one CPU + current_idt[49].set_func(irq::lapic_error); + + // reserve bit 49 + *current_reservations[1].get_mut() |= 1 << 17; + } + + #[cfg(target_arch = "x86")] + use_default_irqs!(current_idt); + + // Set IPI handlers + current_idt[IpiKind::Wakeup as usize].set_func(ipi::wakeup); + current_idt[IpiKind::Switch as usize].set_func(ipi::switch); + current_idt[IpiKind::Tlb as usize].set_func(ipi::tlb); + current_idt[IpiKind::Pit as usize].set_func(ipi::pit); + idt.set_reserved_mut(IpiKind::Wakeup as u8, true); + idt.set_reserved_mut(IpiKind::Switch as u8, true); + idt.set_reserved_mut(IpiKind::Tlb as u8, true); + idt.set_reserved_mut(IpiKind::Pit as u8, true); + + #[cfg(target_arch = "x86")] + { + let current_idt = &mut idt.entries; + // Set syscall function + current_idt[0x80].set_func(syscall::syscall); + current_idt[0x80].set_flags(IdtFlags::PRESENT | IdtFlags::RING_3 | IdtFlags::INTERRUPT); + idt.set_reserved_mut(0x80, true); + } + + #[cfg(feature = "profiling")] + crate::profiling::maybe_setup_timer(idt, cpu_id); + + dtables::lidt(&idtr); +} + +bitflags! { + pub struct IdtFlags: u8 { + const PRESENT = 1 << 7; + const RING_0 = 0 << 5; + const RING_1 = 1 << 5; + const RING_2 = 2 << 5; + const RING_3 = 3 << 5; + const SS = 1 << 4; + const INTERRUPT = 0xE; + const TRAP = 0xF; + } +} + +#[derive(Copy, Clone, Debug, Default)] +#[repr(C, packed)] +pub struct IdtEntry { + offsetl: u16, + selector: u16, + zero: u8, + attribute: u8, + offsetm: u16, + #[cfg(target_arch = "x86_64")] + offseth: u32, + #[cfg(target_arch = "x86_64")] + _zero2: u32, +} + +impl IdtEntry { + pub const fn new() -> IdtEntry { + IdtEntry { + offsetl: 0, + selector: 0, + zero: 0, + attribute: 0, + offsetm: 0, + #[cfg(target_arch = "x86_64")] + offseth: 0, + #[cfg(target_arch = "x86_64")] + _zero2: 0, + } + } + + pub fn set_flags(&mut self, flags: IdtFlags) { + self.attribute = flags.bits(); + } + + pub fn set_ist(&mut self, ist: u8) { + assert_eq!( + ist & 0x07, + ist, + "interrupt stack table must be within 0..=7" + ); + self.zero &= 0xF8; + self.zero |= ist; + } + + pub fn set_offset(&mut self, selector: u16, base: usize) { + self.selector = selector; + self.offsetl = base as u16; + self.offsetm = (base >> 16) as u16; + #[cfg(target_arch = "x86_64")] + { + self.offseth = ((base as u64) >> 32) as u32; + } + } + + // A function to set the offset more easily + pub fn set_func(&mut self, func: unsafe extern "C" fn()) { + self.set_flags(IdtFlags::PRESENT | IdtFlags::RING_0 | IdtFlags::INTERRUPT); + self.set_offset((crate::gdt::GDT_KERNEL_CODE as u16) << 3, func as usize); + } +} diff --git a/src/arch/x86_shared/interrupt/ipi.rs b/src/arch/x86_shared/interrupt/ipi.rs new file mode 100644 index 00000000..359e1cef --- /dev/null +++ b/src/arch/x86_shared/interrupt/ipi.rs @@ -0,0 +1,24 @@ +use crate::{context, device::local_apic::the_local_apic, percpu::PercpuBlock}; + +interrupt!(wakeup, || { + the_local_apic().eoi(); +}); + +interrupt!(tlb, || { + PercpuBlock::current().maybe_handle_tlb_shootdown(); + + the_local_apic().eoi(); +}); + +interrupt!(switch, || { + the_local_apic().eoi(); + + let _ = context::switch(); +}); + +interrupt!(pit, || { + the_local_apic().eoi(); + + // Switch after a sufficient amount of time since the last switch. + context::switch::tick(); +}); diff --git a/src/arch/x86_shared/interrupt/mod.rs b/src/arch/x86_shared/interrupt/mod.rs new file mode 100644 index 00000000..4d794fb7 --- /dev/null +++ b/src/arch/x86_shared/interrupt/mod.rs @@ -0,0 +1,43 @@ +//! Interrupt instructions + +pub mod ipi; +pub mod trace; + +pub use super::idt::{available_irqs_iter, is_reserved, set_reserved}; + +/// Clear interrupts +#[inline(always)] +pub unsafe fn disable() { + core::arch::asm!("cli", options(nomem, nostack)); +} + +/// Set interrupts and halt +/// This will atomically wait for the next interrupt +/// Performing enable followed by halt is not guaranteed to be atomic, use this instead! +#[inline(always)] +pub unsafe fn enable_and_halt() { + core::arch::asm!("sti; hlt", options(nomem, nostack)); +} + +/// Set interrupts and nop +/// This will enable interrupts and allow the IF flag to be processed +/// Simply enabling interrupts does not gurantee that they will trigger, use this instead! +#[inline(always)] +pub unsafe fn enable_and_nop() { + core::arch::asm!("sti; nop", options(nomem, nostack)); +} + +/// Halt instruction +#[inline(always)] +pub unsafe fn halt() { + core::arch::asm!("hlt", options(nomem, nostack)); +} + +/// Pause instruction +/// Safe because it is similar to a NOP, and has no memory effects +#[inline(always)] +pub fn pause() { + unsafe { + core::arch::asm!("pause", options(nomem, nostack)); + } +} diff --git a/src/arch/x86_shared/interrupt/trace.rs b/src/arch/x86_shared/interrupt/trace.rs new file mode 100644 index 00000000..9df85d66 --- /dev/null +++ b/src/arch/x86_shared/interrupt/trace.rs @@ -0,0 +1,31 @@ +use core::mem; + +pub struct StackTrace { + pub fp: usize, + pub pc_ptr: *const usize, +} + +impl StackTrace { + #[inline(always)] + pub unsafe fn start() -> Option { + let mut fp: usize; + #[cfg(target_arch = "x86")] + core::arch::asm!("mov {}, ebp", out(reg) fp); + #[cfg(target_arch = "x86_64")] + core::arch::asm!("mov {}, rbp", out(reg) fp); + let pc_ptr = fp.checked_add(mem::size_of::())?; + Some(Self { + fp, + pc_ptr: pc_ptr as *const usize, + }) + } + + pub unsafe fn next(self) -> Option { + let fp = *(self.fp as *const usize); + let pc_ptr = fp.checked_add(mem::size_of::())?; + Some(Self { + fp: fp, + pc_ptr: pc_ptr as *const usize, + }) + } +} diff --git a/src/arch/x86_shared/ipi.rs b/src/arch/x86_shared/ipi.rs new file mode 100644 index 00000000..440489ed --- /dev/null +++ b/src/arch/x86_shared/ipi.rs @@ -0,0 +1,55 @@ +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum IpiKind { + Wakeup = 0x40, + Tlb = 0x41, + Switch = 0x42, + Pit = 0x43, + + #[cfg(feature = "profiling")] + Profile = 0x44, +} + +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum IpiTarget { + Current = 1, + All = 2, + Other = 3, +} + +#[cfg(not(feature = "multi_core"))] +#[inline(always)] +pub fn ipi(_kind: IpiKind, _target: IpiTarget) {} + +#[cfg(feature = "multi_core")] +#[inline(always)] +pub fn ipi(kind: IpiKind, target: IpiTarget) { + use crate::device::local_apic::the_local_apic; + + #[cfg(feature = "profiling")] + if matches!(kind, IpiKind::Profile) { + let icr = (target as u64) << 18 | 1 << 14 | 0b100 << 8; + unsafe { the_local_apic().set_icr(icr) }; + return; + } + + let icr = (target as u64) << 18 | 1 << 14 | (kind as u64); + unsafe { the_local_apic().set_icr(icr) }; +} +use crate::cpu_set::LogicalCpuId; + +#[cfg(feature = "multi_core")] +#[inline(always)] +pub fn ipi_single(kind: IpiKind, target: LogicalCpuId) { + use crate::device::local_apic::the_local_apic; + + unsafe { + // TODO: Distinguish between logical and physical CPU IDs + the_local_apic().ipi(target.get(), kind); + } +} + +#[cfg(not(feature = "multi_core"))] +#[inline(always)] +pub fn ipi_single(_kind: IpiKind, _target: LogicalCpuId) {} diff --git a/src/arch/x86_shared/mod.rs b/src/arch/x86_shared/mod.rs new file mode 100644 index 00000000..f92099d1 --- /dev/null +++ b/src/arch/x86_shared/mod.rs @@ -0,0 +1,26 @@ +/// CPUID wrapper +pub mod cpuid; + +/// Debugging support +pub mod debug; + +/// Devices +pub mod device; + +/// Interrupt descriptor table +pub mod idt; + +/// Interrupt instructions +#[macro_use] +pub mod interrupt; + +/// Inter-processor interrupts +pub mod ipi; + +/// Page table isolation +pub mod pti; + +/// Stop function +pub mod stop; + +pub mod time; diff --git a/src/arch/x86_64/pti.rs b/src/arch/x86_shared/pti.rs similarity index 72% rename from src/arch/x86_64/pti.rs rename to src/arch/x86_shared/pti.rs index 33637155..24866c31 100644 --- a/src/arch/x86_64/pti.rs +++ b/src/arch/x86_shared/pti.rs @@ -2,11 +2,11 @@ use core::ptr; #[cfg(feature = "pti")] -use memory::Frame; +use crate::memory::Frame; #[cfg(feature = "pti")] -use paging::ActivePageTable; +use crate::paging::entry::EntryFlags; #[cfg(feature = "pti")] -use paging::entry::EntryFlags; +use crate::paging::ActivePageTable; #[cfg(feature = "pti")] #[thread_local] @@ -20,19 +20,15 @@ pub static mut PTI_CONTEXT_STACK: usize = 0; #[inline(always)] unsafe fn switch_stack(old: usize, new: usize) { let old_rsp: usize; - asm!("" : "={rsp}"(old_rsp) : : : "intel", "volatile"); + asm!("", out("rsp") old_rsp); let offset_rsp = old - old_rsp; let new_rsp = new - offset_rsp; - ptr::copy_nonoverlapping( - old_rsp as *const u8, - new_rsp as *mut u8, - offset_rsp - ); + ptr::copy_nonoverlapping(old_rsp as *const u8, new_rsp as *mut u8, offset_rsp); - asm!("" : : "{rsp}"(new_rsp) : : "intel", "volatile"); + asm!("", out("rsp") new_rsp); } #[cfg(feature = "pti")] @@ -43,7 +39,7 @@ pub unsafe fn map() { // // // Map kernel heap // let address = active_table.p4()[::KERNEL_HEAP_PML4].address(); - // let frame = Frame::containing_address(address); + // let frame = Frame::containing(address); // let mut flags = active_table.p4()[::KERNEL_HEAP_PML4].flags(); // flags.remove(EntryFlags::PRESENT); // active_table.p4_mut()[::KERNEL_HEAP_PML4].set(frame, flags); @@ -53,21 +49,27 @@ pub unsafe fn map() { // } // Switch to per-context stack - switch_stack(PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len(), PTI_CONTEXT_STACK); + switch_stack( + PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len(), + PTI_CONTEXT_STACK, + ); } #[cfg(feature = "pti")] #[inline(always)] -pub unsafe fn unmap() { +pub unsafe extern "C" fn unmap() { // Switch to per-CPU stack - switch_stack(PTI_CONTEXT_STACK, PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len()); + switch_stack( + PTI_CONTEXT_STACK, + PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len(), + ); // { // let mut active_table = unsafe { ActivePageTable::new() }; // // // Unmap kernel heap // let address = active_table.p4()[::KERNEL_HEAP_PML4].address(); - // let frame = Frame::containing_address(address); + // let frame = Frame::containing(address); // let mut flags = active_table.p4()[::KERNEL_HEAP_PML4].flags(); // flags.insert(EntryFlags::PRESENT); // active_table.p4_mut()[::KERNEL_HEAP_PML4].set(frame, flags); @@ -83,4 +85,4 @@ pub unsafe fn map() {} #[cfg(not(feature = "pti"))] #[inline(always)] -pub unsafe fn unmap() {} +pub unsafe extern "C" fn unmap() {} diff --git a/src/arch/x86_shared/stop.rs b/src/arch/x86_shared/stop.rs new file mode 100644 index 00000000..0a573011 --- /dev/null +++ b/src/arch/x86_shared/stop.rs @@ -0,0 +1,109 @@ +#[cfg(feature = "acpi")] +use crate::{context, scheme::acpi, time}; + +use crate::syscall::io::{Io, Pio}; + +pub unsafe fn kreset() -> ! { + log::info!("kreset"); + + // 8042 reset + { + println!("Reset with 8042"); + let mut port = Pio::::new(0x64); + while port.readf(2) {} + port.write(0xFE); + } + + emergency_reset(); +} + +#[cfg(target_arch = "x86")] +pub unsafe fn emergency_reset() -> ! { + // Use triple fault to guarantee reset + core::arch::asm!( + " + cli + sidt [esp+16] + // set IDT limit to zero + mov word ptr [esp+16], 0 + lidt [esp+16] + int $3 + ", + options(noreturn) + ); +} + +#[cfg(target_arch = "x86_64")] +pub unsafe fn emergency_reset() -> ! { + // Use triple fault to guarantee reset + core::arch::asm!( + " + cli + sidt [rsp+16] + // set IDT limit to zero + mov word ptr [rsp+16], 0 + lidt [rsp+16] + int $3 + ", + options(noreturn) + ); +} + +#[cfg(feature = "acpi")] +fn userspace_acpi_shutdown() { + log::info!("Notifying any potential ACPI driver"); + // Tell whatever driver that handles ACPI, that it should enter the S5 state (i.e. + // shutdown). + if !acpi::register_kstop() { + // There was no context to switch to. + log::info!("No ACPI driver was alive to handle shutdown."); + return; + } + log::info!("Waiting one second for ACPI driver to run the shutdown sequence."); + let initial = time::monotonic(); + + // Since this driver is a userspace process, and we do not use any magic like directly + // context switching, we have to wait for the userspace driver to complete, with a timeout. + // + // We switch context, and wait for one second. + loop { + // TODO: Switch directly to whichever process is handling the kstop pipe. We would add an + // event flag like EVENT_DIRECT, which has already been suggested for IRQs. + // TODO: Waitpid with timeout? Because, what if the ACPI driver would crash? + let _ = context::switch(); + + let current = time::monotonic(); + if current - initial > time::NANOS_PER_SEC { + log::info!("Timeout reached, thus falling back to other shutdown methods."); + return; + } + } +} + +pub unsafe fn kstop() -> ! { + log::info!("Running kstop()"); + + #[cfg(feature = "acpi")] + userspace_acpi_shutdown(); + + // Magic shutdown code for bochs and qemu (older versions). + for c in "Shutdown".bytes() { + let port = 0x8900; + println!("Shutdown with outb(0x{:X}, '{}')", port, c as char); + Pio::::new(port).write(c); + } + + // Magic shutdown using qemu default ACPI method + { + let port = 0x604; + let data = 0x2000; + println!("Shutdown with outb(0x{:X}, 0x{:X})", port, data); + Pio::::new(port).write(data); + } + + // Magic code for VMWare. Also a hard lock. + println!("Shutdown with cli hlt"); + loop { + core::arch::asm!("cli; hlt"); + } +} diff --git a/src/arch/x86_shared/time.rs b/src/arch/x86_shared/time.rs new file mode 100644 index 00000000..d953a9c6 --- /dev/null +++ b/src/arch/x86_shared/time.rs @@ -0,0 +1,49 @@ +#[cfg(feature = "acpi")] +use super::device::hpet; +use super::device::pit; + +pub fn monotonic_absolute() -> u128 { + // The paravirtualized TSC is already guaranteed to be monotonic, and thus doesn't need to be + // readjusted. + #[cfg(feature = "x86_kvm_pv")] + if let Some(ns) = super::device::tsc::monotonic_absolute() { + return ns; + } + + *crate::time::OFFSET.lock() + hpet_or_pit() +} +fn hpet_or_pit() -> u128 { + #[cfg(feature = "acpi")] + if let Some(ref hpet) = *crate::acpi::ACPI_TABLE.hpet.read() { + //TODO: handle rollover? + //TODO: improve performance + + // Current count + let counter = unsafe { hpet.read_u64(hpet::MAIN_COUNTER_OFFSET) }; + // Comparator holds next interrupt count + let comparator = unsafe { hpet.read_u64(hpet::T0_COMPARATOR_OFFSET) }; + // Get period in femtoseconds + let capability = unsafe { hpet.read_u64(hpet::CAPABILITY_OFFSET) }; + + // There seems to be a bug in qemu on macos that causes the calculation to produce 0 for + // period_fs and hence a divide by zero calculating the divisor - workaround it while we + // try and get a fix from qemu: https://gitlab.com/qemu-project/qemu/-/issues/1570 + let mut period_fs = capability >> 32; + if period_fs == 0 { + period_fs = 10_000_000; + } + + // Calculate divisor + let divisor = (pit::RATE as u64 * 1_000_000) / period_fs; + // Calculate last interrupt + let last_interrupt = comparator.saturating_sub(divisor); + // Calculate ticks since last interrupt + let elapsed = counter.saturating_sub(last_interrupt); + // Calculate nanoseconds since last interrupt + return (elapsed as u128 * period_fs as u128) / 1_000_000; + } + // Read ticks since last interrupt + let elapsed = unsafe { pit::read() }; + // Calculate nanoseconds since last interrupt + (elapsed as u128 * pit::PERIOD_FS) / 1_000_000 +} diff --git a/src/asm/x86/trampoline.asm b/src/asm/x86/trampoline.asm new file mode 100644 index 00000000..6a11b404 --- /dev/null +++ b/src/asm/x86/trampoline.asm @@ -0,0 +1,170 @@ +; trampoline for bringing up APs +; compiled with nasm by build.rs, and included in src/acpi/madt.rs + +ORG 0x8000 +SECTION .text +USE16 + +trampoline: + jmp short startup_ap + times 8 - ($ - trampoline) nop + .ready: dq 0 + .cpu_id: dq 0 + .page_table: dq 0 + .stack_start: dq 0 + .stack_end: dq 0 + .code: dq 0 + +startup_ap: + cli + + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + + ; initialize stack to invalid value + mov sp, 0 + + ; cr3 holds pointer to PML4 + mov edi, [trampoline.page_table] + mov cr3, edi + + ; enable FPU + mov eax, cr0 + and al, 11110011b ; Clear task switched (3) and emulation (2) + or al, 00100010b ; Set numeric error (5) monitor co-processor (1) + mov cr0, eax + + ; 9: FXSAVE/FXRSTOR + ; 7: Page Global + ; 4: Page Size Extension + mov eax, cr4 + or eax, 1 << 9 | 1 << 7 | 1 << 4 + mov cr4, eax + + ; initialize floating point registers + fninit + + ; load protected mode GDT + lgdt [gdtr] + + ;enabling paging and protection simultaneously + mov ebx, cr0 + ; 31: Paging + ; 16: write protect kernel + ; 0: Protected Mode + or ebx, 1 << 31 | 1 << 16 | 1 + mov cr0, ebx + + ; far jump to enable Protected Mode and load CS with 32 bit segment + jmp gdt.kernel_code:protected_mode_ap + +USE32 +protected_mode_ap: + mov eax, gdt.kernel_data + mov ds, eax + mov es, eax + mov fs, eax + mov gs, eax + mov ss, eax + + mov eax, [trampoline.stack_end] + lea esp, [eax - 256] + + mov eax, trampoline.cpu_id + push eax + + mov eax, [trampoline.code] + mov dword [trampoline.ready], 1 + call eax +.halt: + cli + hlt + jmp .halt + +struc GDTEntry + .limitl resw 1 + .basel resw 1 + .basem resb 1 + .attribute resb 1 + .flags__limith resb 1 + .baseh resb 1 +endstruc + +attrib: + .present equ 1 << 7 + .ring1 equ 1 << 5 + .ring2 equ 1 << 6 + .ring3 equ 1 << 5 | 1 << 6 + .user equ 1 << 4 +;user + .code equ 1 << 3 +; code + .conforming equ 1 << 2 + .readable equ 1 << 1 +; data + .expand_down equ 1 << 2 + .writable equ 1 << 1 + .accessed equ 1 << 0 +;system +; legacy + .tssAvailabe16 equ 0x1 + .ldt equ 0x2 + .tssBusy16 equ 0x3 + .call16 equ 0x4 + .task equ 0x5 + .interrupt16 equ 0x6 + .trap16 equ 0x7 + .tssAvailabe32 equ 0x9 + .tssBusy32 equ 0xB + .call32 equ 0xC + .interrupt32 equ 0xE + .trap32 equ 0xF +; long mode + .ldt32 equ 0x2 + .tssAvailabe64 equ 0x9 + .tssBusy64 equ 0xB + .call64 equ 0xC + .interrupt64 equ 0xE + .trap64 equ 0xF + +flags: + .granularity equ 1 << 7 + .available equ 1 << 4 +;user + .default_operand_size equ 1 << 6 +; code + .long_mode equ 1 << 5 +; data + .reserved equ 1 << 5 + +gdtr: + dw gdt.end + 1 ; size + dq gdt ; offset + +gdt: +.null equ $ - gdt + dq 0 + +.kernel_code equ $ - gdt +istruc GDTEntry + at GDTEntry.limitl, dw 0xFFFF + at GDTEntry.basel, dw 0 + at GDTEntry.basem, db 0 + at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code | attrib.readable + at GDTEntry.flags__limith, db 0xF | flags.granularity | flags.default_operand_size + at GDTEntry.baseh, db 0 +iend + +.kernel_data equ $ - gdt +istruc GDTEntry + at GDTEntry.limitl, dw 0xFFFF + at GDTEntry.basel, dw 0 + at GDTEntry.basem, db 0 + at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable + at GDTEntry.flags__limith, db 0xF | flags.granularity | flags.default_operand_size + at GDTEntry.baseh, db 0 +iend + +.end equ $ - gdt diff --git a/src/asm/x86_64/trampoline.asm b/src/asm/x86_64/trampoline.asm new file mode 100644 index 00000000..538ff1b7 --- /dev/null +++ b/src/asm/x86_64/trampoline.asm @@ -0,0 +1,173 @@ +; trampoline for bringing up APs +; compiled with nasm by build.rs, and included in src/acpi/madt.rs + +ORG 0x8000 +SECTION .text +USE16 + +trampoline: + jmp short startup_ap + times 8 - ($ - trampoline) nop + .ready: dq 0 + .cpu_id: dq 0 + .page_table: dq 0 + .stack_start: dq 0 + .stack_end: dq 0 + .code: dq 0 + +startup_ap: + cli + + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + + ; initialize stack to invalid value + mov sp, 0 + + ; cr3 holds pointer to PML4 + mov edi, [trampoline.page_table] + mov cr3, edi + + ; enable FPU + mov eax, cr0 + and al, 11110011b ; Clear task switched (3) and emulation (2) + or al, 00100010b ; Set numeric error (5) monitor co-processor (1) + mov cr0, eax + + ; 9: FXSAVE/FXRSTOR + ; 7: Page Global + ; 5: Page Address Extension + ; 4: Page Size Extension + mov eax, cr4 + or eax, 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4 + mov cr4, eax + + ; initialize floating point registers + fninit + + ; load protected mode GDT + lgdt [gdtr] + + ; enable long mode + mov ecx, 0xC0000080 ; Read from the EFER MSR. + rdmsr + or eax, 1 << 11 | 1 << 8 ; Set the Long-Mode-Enable and NXE bit. + wrmsr + + ; enabling paging and protection simultaneously + mov ebx, cr0 + ; 31: Paging + ; 16: write protect kernel + ; 0: Protected Mode + or ebx, 1 << 31 | 1 << 16 | 1 + mov cr0, ebx + + ; far jump to enable Long Mode and load CS with 64 bit segment + jmp gdt.kernel_code:long_mode_ap + +USE64 +long_mode_ap: + mov rax, gdt.kernel_data + mov ds, rax + mov es, rax + mov fs, rax + mov gs, rax + mov ss, rax + + mov rcx, [trampoline.stack_end] + lea rsp, [rcx - 256] + + mov rdi, trampoline.cpu_id + + mov rax, [trampoline.code] + mov qword [trampoline.ready], 1 + jmp rax + +struc GDTEntry + .limitl resw 1 + .basel resw 1 + .basem resb 1 + .attribute resb 1 + .flags__limith resb 1 + .baseh resb 1 +endstruc + +attrib: + .present equ 1 << 7 + .ring1 equ 1 << 5 + .ring2 equ 1 << 6 + .ring3 equ 1 << 5 | 1 << 6 + .user equ 1 << 4 +;user + .code equ 1 << 3 +; code + .conforming equ 1 << 2 + .readable equ 1 << 1 +; data + .expand_down equ 1 << 2 + .writable equ 1 << 1 + .accessed equ 1 << 0 +;system +; legacy + .tssAvailabe16 equ 0x1 + .ldt equ 0x2 + .tssBusy16 equ 0x3 + .call16 equ 0x4 + .task equ 0x5 + .interrupt16 equ 0x6 + .trap16 equ 0x7 + .tssAvailabe32 equ 0x9 + .tssBusy32 equ 0xB + .call32 equ 0xC + .interrupt32 equ 0xE + .trap32 equ 0xF +; long mode + .ldt32 equ 0x2 + .tssAvailabe64 equ 0x9 + .tssBusy64 equ 0xB + .call64 equ 0xC + .interrupt64 equ 0xE + .trap64 equ 0xF + +flags: + .granularity equ 1 << 7 + .available equ 1 << 4 +;user + .default_operand_size equ 1 << 6 +; code + .long_mode equ 1 << 5 +; data + .reserved equ 1 << 5 + +gdtr: + dw gdt.end + 1 ; size + dq gdt ; offset + +gdt: +.null equ $ - gdt + dq 0 + +.kernel_code equ $ - gdt +istruc GDTEntry + at GDTEntry.limitl, dw 0 + at GDTEntry.basel, dw 0 + at GDTEntry.basem, db 0 + at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code + at GDTEntry.flags__limith, db flags.long_mode + at GDTEntry.baseh, db 0 +iend + +.kernel_data equ $ - gdt +istruc GDTEntry + at GDTEntry.limitl, dw 0 + at GDTEntry.basel, dw 0 + at GDTEntry.basem, db 0 +; AMD System Programming Manual states that the writeable bit is ignored in long mode, but ss can not be set to this descriptor without it + at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable + at GDTEntry.flags__limith, db 0 + at GDTEntry.baseh, db 0 +iend + +.end equ $ - gdt diff --git a/src/common/aligned_box.rs b/src/common/aligned_box.rs new file mode 100644 index 00000000..c88aef32 --- /dev/null +++ b/src/common/aligned_box.rs @@ -0,0 +1,122 @@ +use core::alloc::{GlobalAlloc, Layout}; + +use crate::{common::unique::Unique, memory::Enomem}; + +// Necessary because GlobalAlloc::dealloc requires the layout to be the same, and therefore Box +// cannot be used for increased alignment directly. +// TODO: move to common? +pub struct AlignedBox { + inner: Unique, +} +pub unsafe trait ValidForZero {} +unsafe impl ValidForZero for [u8; N] {} +unsafe impl ValidForZero for u8 {} + +impl AlignedBox { + fn layout(&self) -> Layout { + layout_upgrade_align(Layout::for_value::(&*self), ALIGN) + } +} +const fn layout_upgrade_align(layout: Layout, align: usize) -> Layout { + const fn max(a: usize, b: usize) -> usize { + if a > b { + a + } else { + b + } + } + let Ok(x) = Layout::from_size_align(layout.size(), max(align, layout.align())) else { + panic!("failed to calculate layout"); + }; + x +} + +impl AlignedBox { + #[inline(always)] + pub fn try_zeroed() -> Result + where + T: ValidForZero, + { + Ok(unsafe { + let ptr = + crate::ALLOCATOR.alloc_zeroed(layout_upgrade_align(Layout::new::(), ALIGN)); + if ptr.is_null() { + return Err(Enomem); + } + Self { + inner: Unique::new_unchecked(ptr.cast()), + } + }) + } +} +impl AlignedBox<[T], ALIGN> { + #[inline] + pub fn try_zeroed_slice(len: usize) -> Result + where + T: ValidForZero, + { + Ok(unsafe { + let ptr = crate::ALLOCATOR.alloc_zeroed(layout_upgrade_align( + Layout::array::(len).unwrap(), + ALIGN, + )); + if ptr.is_null() { + return Err(Enomem); + } + Self { + inner: Unique::new_unchecked(core::ptr::slice_from_raw_parts_mut(ptr.cast(), len)), + } + }) + } +} + +impl core::fmt::Debug for AlignedBox { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "[aligned box at {:p}, size {} alignment {}]", + self.inner.as_ptr(), + self.layout().size(), + self.layout().align() + ) + } +} +impl Drop for AlignedBox { + fn drop(&mut self) { + unsafe { + let layout = self.layout(); + core::ptr::drop_in_place(self.inner.as_ptr()); + crate::ALLOCATOR.dealloc(self.inner.as_ptr().cast(), layout); + } + } +} +impl core::ops::Deref for AlignedBox { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.inner.as_ptr() } + } +} +impl core::ops::DerefMut for AlignedBox { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.inner.as_ptr() } + } +} +impl Clone for AlignedBox { + fn clone(&self) -> Self { + let mut new = + Self::try_zeroed().unwrap_or_else(|_| alloc::alloc::handle_alloc_error(self.layout())); + T::clone_from(&mut new, self); + new + } +} +impl Clone for AlignedBox<[T], ALIGN> { + fn clone(&self) -> Self { + let mut new = Self::try_zeroed_slice(self.len()) + .unwrap_or_else(|_| alloc::alloc::handle_alloc_error(self.layout())); + for i in 0..self.len() { + new[i].clone_from(&self[i]); + } + new + } +} diff --git a/src/common/int_like.rs b/src/common/int_like.rs index 103e5092..bad1534a 100644 --- a/src/common/int_like.rs +++ b/src/common/int_like.rs @@ -25,19 +25,34 @@ #[macro_export] macro_rules! int_like { ($new_type_name:ident, $backing_type: ident) => { - #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)] + #[derive(Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone, Copy)] pub struct $new_type_name($backing_type); impl $new_type_name { #[allow(dead_code)] - pub const fn into(self) -> $backing_type { + #[inline] + pub const fn get(self) -> $backing_type { self.0 } #[allow(dead_code)] - pub const fn from(x: $backing_type) -> Self { + #[inline] + pub const fn new(x: $backing_type) -> Self { $new_type_name(x) } } + + impl ::core::convert::From<$backing_type> for $new_type_name { + #[inline] + fn from(inner: $backing_type) -> Self { + Self::new(inner) + } + } + impl ::core::convert::From<$new_type_name> for $backing_type { + #[inline] + fn from(wrapped: $new_type_name) -> Self { + wrapped.get() + } + } }; ($new_type_name:ident, $new_atomic_type_name: ident, $backing_type:ident, $backing_atomic_type:ident) => { @@ -51,63 +66,97 @@ macro_rules! int_like { impl $new_atomic_type_name { #[allow(dead_code)] + #[inline] pub const fn new(x: $new_type_name) -> Self { $new_atomic_type_name { - container: $backing_atomic_type::new(x.into()) + container: $backing_atomic_type::new(x.get()), } } #[allow(dead_code)] - pub const fn default() -> Self { - Self::new($new_type_name::from(0)) - } - #[allow(dead_code)] + #[inline] pub fn load(&self, order: ::core::sync::atomic::Ordering) -> $new_type_name { $new_type_name::from(self.container.load(order)) } #[allow(dead_code)] + #[inline] pub fn store(&self, val: $new_type_name, order: ::core::sync::atomic::Ordering) { self.container.store(val.into(), order) } #[allow(dead_code)] - pub fn swap(&self, val: $new_type_name, order: ::core::sync::atomic::Ordering) -> $new_type_name { + #[inline] + pub fn swap( + &self, + val: $new_type_name, + order: ::core::sync::atomic::Ordering, + ) -> $new_type_name { $new_type_name::from(self.container.swap(val.into(), order)) } #[allow(dead_code)] - pub fn compare_and_swap(&self, current: $new_type_name, new: $new_type_name, order: ::core::sync::atomic::Ordering) -> $new_type_name { - $new_type_name::from(self.container.compare_and_swap(current.into(), new.into(), order)) + #[inline] + pub fn fetch_add( + &self, + with: $new_type_name, + order: ::core::sync::atomic::Ordering, + ) -> $new_type_name { + $new_type_name::from(self.container.fetch_add(with.into(), order)) } #[allow(dead_code)] - pub fn compare_exchange(&self, current: $new_type_name, new: $new_type_name, success: ::core::sync::atomic::Ordering, failure: ::core::sync::atomic::Ordering) -> ::core::result::Result<$new_type_name, $new_type_name> { - match self.container.compare_exchange(current.into(), new.into(), success, failure) { + #[inline] + pub fn compare_exchange( + &self, + current: $new_type_name, + new: $new_type_name, + success: ::core::sync::atomic::Ordering, + failure: ::core::sync::atomic::Ordering, + ) -> ::core::result::Result<$new_type_name, $new_type_name> { + match self + .container + .compare_exchange(current.into(), new.into(), success, failure) + { Ok(result) => Ok($new_type_name::from(result)), - Err(result) => Err($new_type_name::from(result)) + Err(result) => Err($new_type_name::from(result)), } } #[allow(dead_code)] - pub fn compare_exchange_weak(&self, current: $new_type_name, new: $new_type_name, success: ::core::sync::atomic::Ordering, failure: ::core::sync::atomic::Ordering) -> ::core::result::Result<$new_type_name, $new_type_name> { - match self.container.compare_exchange_weak(current.into(), new.into(), success, failure) { + #[inline] + pub fn compare_exchange_weak( + &self, + current: $new_type_name, + new: $new_type_name, + success: ::core::sync::atomic::Ordering, + failure: ::core::sync::atomic::Ordering, + ) -> ::core::result::Result<$new_type_name, $new_type_name> { + match self.container.compare_exchange_weak( + current.into(), + new.into(), + success, + failure, + ) { Ok(result) => Ok($new_type_name::from(result)), - Err(result) => Err($new_type_name::from(result)) + Err(result) => Err($new_type_name::from(result)), } } } - } + impl ::core::default::Default for $new_atomic_type_name { + #[inline] + fn default() -> Self { + Self::new($new_type_name::new(0)) + } + } + }; } #[test] fn test() { - use core::mem::size_of; use ::core::sync::atomic::AtomicUsize; + use core::mem::size_of; // Generate type `usize_like`. int_like!(UsizeLike, usize); assert_eq!(size_of::(), size_of::()); - // Generate types `usize_like` and `AtomicUsize`. int_like!(UsizeLike2, AtomicUsizeLike, usize, AtomicUsize); assert_eq!(size_of::(), size_of::()); assert_eq!(size_of::(), size_of::()); } - - diff --git a/src/common/mod.rs b/src/common/mod.rs index 4bdafba1..9f2a70f7 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,3 +1,4 @@ +pub mod aligned_box; #[macro_use] pub mod int_like; pub mod unique; diff --git a/src/common/unique.rs b/src/common/unique.rs index f426c525..49888e99 100644 --- a/src/common/unique.rs +++ b/src/common/unique.rs @@ -4,29 +4,26 @@ use core::{fmt, ptr::NonNull}; /// only correct if the pointer is never accessed from multiple /// locations across threads. Which is always, if the pointer is /// unique. -pub struct Unique(NonNull); +pub struct Unique(NonNull); -impl Copy for Unique {} -impl Clone for Unique { +impl Copy for Unique {} +impl Clone for Unique { fn clone(&self) -> Self { *self } } -unsafe impl Send for Unique {} -unsafe impl Sync for Unique {} +unsafe impl Send for Unique {} +unsafe impl Sync for Unique {} -impl Unique { - pub fn new(ptr: *mut T) -> Self { - Self(NonNull::new(ptr).expect("Did not expect pointer to be null")) - } +impl Unique { pub unsafe fn new_unchecked(ptr: *mut T) -> Self { Self(NonNull::new_unchecked(ptr)) } - pub fn as_ptr(&self) -> *mut T { + pub fn as_ptr(self) -> *mut T { self.0.as_ptr() } } -impl fmt::Debug for Unique { +impl fmt::Debug for Unique { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:?}", self.0) } diff --git a/src/consts.rs b/src/consts.rs deleted file mode 100644 index 8f59b475..00000000 --- a/src/consts.rs +++ /dev/null @@ -1,93 +0,0 @@ -// Because the memory map is so important to not be aliased, it is defined here, in one place -// The lower 256 PML4 entries are reserved for userspace -// Each PML4 entry references up to 512 GB of memory -// The top (511) PML4 is reserved for recursive mapping -// The second from the top (510) PML4 is reserved for the kernel - /// The size of a single PML4 - pub const PML4_SIZE: usize = 0x0000_0080_0000_0000; - pub const PML4_MASK: usize = 0x0000_ff80_0000_0000; - - /// Offset of recursive paging - pub const RECURSIVE_PAGE_OFFSET: usize = (-(PML4_SIZE as isize)) as usize; - pub const RECURSIVE_PAGE_PML4: usize = (RECURSIVE_PAGE_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset of kernel - pub const KERNEL_OFFSET: usize = RECURSIVE_PAGE_OFFSET - PML4_SIZE; - pub const KERNEL_PML4: usize = (KERNEL_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to kernel heap - pub const KERNEL_HEAP_OFFSET: usize = KERNEL_OFFSET - PML4_SIZE; - pub const KERNEL_HEAP_PML4: usize = (KERNEL_HEAP_OFFSET & PML4_MASK)/PML4_SIZE; - /// Size of kernel heap - pub const KERNEL_HEAP_SIZE: usize = 1 * 1024 * 1024; // 1 MB - - /// Offset to kernel percpu variables - //TODO: Use 64-bit fs offset to enable this pub const KERNEL_PERCPU_OFFSET: usize = KERNEL_HEAP_OFFSET - PML4_SIZE; - pub const KERNEL_PERCPU_OFFSET: usize = 0xC000_0000; - /// Size of kernel percpu variables - pub const KERNEL_PERCPU_SIZE: usize = 64 * 1024; // 64 KB - - /// Offset to user image - pub const USER_OFFSET: usize = 0; - pub const USER_PML4: usize = (USER_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user TCB - /// Each process has 4096 bytes, at an offset of 4096 * PID - pub const USER_TCB_OFFSET: usize = 0xB000_0000; - - /// Offset to user arguments - pub const USER_ARG_OFFSET: usize = USER_OFFSET + PML4_SIZE/2; - - /// Offset to user heap - pub const USER_HEAP_OFFSET: usize = USER_OFFSET + PML4_SIZE; - pub const USER_HEAP_PML4: usize = (USER_HEAP_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user grants - pub const USER_GRANT_OFFSET: usize = USER_HEAP_OFFSET + PML4_SIZE; - pub const USER_GRANT_PML4: usize = (USER_GRANT_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user stack - pub const USER_STACK_OFFSET: usize = USER_GRANT_OFFSET + PML4_SIZE; - pub const USER_STACK_PML4: usize = (USER_STACK_OFFSET & PML4_MASK)/PML4_SIZE; - /// Size of user stack - pub const USER_STACK_SIZE: usize = 1024 * 1024; // 1 MB - - /// Offset to user sigstack - pub const USER_SIGSTACK_OFFSET: usize = USER_STACK_OFFSET + PML4_SIZE; - pub const USER_SIGSTACK_PML4: usize = (USER_SIGSTACK_OFFSET & PML4_MASK)/PML4_SIZE; - /// Size of user sigstack - pub const USER_SIGSTACK_SIZE: usize = 256 * 1024; // 256 KB - - /// Offset to user TLS - pub const USER_TLS_OFFSET: usize = USER_SIGSTACK_OFFSET + PML4_SIZE; - pub const USER_TLS_PML4: usize = (USER_TLS_OFFSET & PML4_MASK)/PML4_SIZE; - // Maximum TLS allocated to each PID, should be approximately 8 MB - pub const USER_TLS_SIZE: usize = PML4_SIZE / 65536; - - /// Offset to user temporary image (used when cloning) - pub const USER_TMP_OFFSET: usize = USER_TLS_OFFSET + PML4_SIZE; - pub const USER_TMP_PML4: usize = (USER_TMP_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary heap (used when cloning) - pub const USER_TMP_HEAP_OFFSET: usize = USER_TMP_OFFSET + PML4_SIZE; - pub const USER_TMP_HEAP_PML4: usize = (USER_TMP_HEAP_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary page for grants - pub const USER_TMP_GRANT_OFFSET: usize = USER_TMP_HEAP_OFFSET + PML4_SIZE; - pub const USER_TMP_GRANT_PML4: usize = (USER_TMP_GRANT_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary stack (used when cloning) - pub const USER_TMP_STACK_OFFSET: usize = USER_TMP_GRANT_OFFSET + PML4_SIZE; - pub const USER_TMP_STACK_PML4: usize = (USER_TMP_STACK_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary sigstack (used when cloning) - pub const USER_TMP_SIGSTACK_OFFSET: usize = USER_TMP_STACK_OFFSET + PML4_SIZE; - pub const USER_TMP_SIGSTACK_PML4: usize = (USER_TMP_SIGSTACK_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary tls (used when cloning) - pub const USER_TMP_TLS_OFFSET: usize = USER_TMP_SIGSTACK_OFFSET + PML4_SIZE; - pub const USER_TMP_TLS_PML4: usize = (USER_TMP_TLS_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset for usage in other temporary pages - pub const USER_TMP_MISC_OFFSET: usize = USER_TMP_TLS_OFFSET + PML4_SIZE; - pub const USER_TMP_MISC_PML4: usize = (USER_TMP_MISC_OFFSET & PML4_MASK)/PML4_SIZE; diff --git a/src/context/arch/aarch64.rs b/src/context/arch/aarch64.rs new file mode 100644 index 00000000..ff90ca55 --- /dev/null +++ b/src/context/arch/aarch64.rs @@ -0,0 +1,394 @@ +use crate::{ + arch::{device::cpu::registers::control_regs, interrupt::InterruptStack, paging::PageMapper}, + context::{context::Kstack, memory::Table}, + percpu::PercpuBlock, + syscall::FloatRegisters, +}; +use core::{mem, mem::offset_of, ptr, sync::atomic::AtomicBool}; +use rmm::TableKind; +use spin::Once; +use syscall::{EnvRegisters, Error, Result, ENOMEM}; + +/// This must be used by the kernel to ensure that context switches are done atomically +/// Compare and exchange this to true when beginning a context switch on any CPU +/// The `Context::switch_to` function will set it back to false, allowing other CPU's to switch +/// This must be done, as no locks can be held on the stack during switch +pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); + +// 512 bytes for registers, extra bytes for fpcr and fpsr +pub const KFX_ALIGN: usize = 16; + +#[derive(Clone, Debug)] +pub struct Context { + elr_el1: usize, + sp_el0: usize, + pub(crate) tpidr_el0: usize, /* Pointer to TLS region for this Context */ + pub(crate) tpidrro_el0: usize, /* Pointer to TLS (read-only) region for this Context */ + spsr_el1: usize, + esr_el1: usize, + fx_loadable: bool, + sp: usize, /* Stack Pointer (x31) */ + lr: usize, /* Link Register (x30) */ + fp: usize, /* Frame pointer Register (x29) */ + x28: usize, /* Callee saved Register */ + x27: usize, /* Callee saved Register */ + x26: usize, /* Callee saved Register */ + x25: usize, /* Callee saved Register */ + x24: usize, /* Callee saved Register */ + x23: usize, /* Callee saved Register */ + x22: usize, /* Callee saved Register */ + x21: usize, /* Callee saved Register */ + x20: usize, /* Callee saved Register */ + x19: usize, /* Callee saved Register */ +} + +impl Context { + pub fn new() -> Context { + Context { + elr_el1: 0, + sp_el0: 0, + tpidr_el0: 0, + tpidrro_el0: 0, + spsr_el1: 0, + esr_el1: 0, + fx_loadable: false, + sp: 0, + lr: 0, + fp: 0, + x28: 0, + x27: 0, + x26: 0, + x25: 0, + x24: 0, + x23: 0, + x22: 0, + x21: 0, + x20: 0, + x19: 0, + } + } + + fn set_stack(&mut self, address: usize) { + self.sp = address; + } + + fn set_x28(&mut self, x28: usize) { + self.x28 = x28; + } + + fn set_lr(&mut self, address: usize) { + self.lr = address; + } + + fn set_context_handle(&mut self) { + let address = self as *const _ as usize; + self.tpidrro_el0 = address; + } + + pub(crate) fn setup_initial_call( + &mut self, + stack: &Kstack, + func: extern "C" fn(), + userspace_allowed: bool, + ) { + let mut stack_top = stack.initial_top(); + + const INT_REGS_SIZE: usize = core::mem::size_of::(); + + if userspace_allowed { + unsafe { + // Zero-initialize InterruptStack registers. + stack_top = stack_top.sub(INT_REGS_SIZE); + stack_top.write_bytes(0_u8, INT_REGS_SIZE); + (&mut *stack_top.cast::()).init(); + } + } + + self.set_lr(crate::interrupt::syscall::enter_usermode as usize); + self.set_x28(func as usize); + self.set_context_handle(); + + self.set_stack(stack_top as usize); + } + + #[allow(unused)] + pub fn dump(&self) { + println!("elr_el1: 0x{:016x}", self.elr_el1); + println!("sp_el0: 0x{:016x}", self.sp_el0); + println!("tpidr_el0: 0x{:016x}", self.tpidr_el0); + println!("tpidrro_el0: 0x{:016x}", self.tpidrro_el0); + println!("spsr_el1: 0x{:016x}", self.spsr_el1); + println!("esr_el1: 0x{:016x}", self.esr_el1); + println!("sp: 0x{:016x}", self.sp); + println!("lr: 0x{:016x}", self.lr); + println!("fp: 0x{:016x}", self.fp); + println!("x28: 0x{:016x}", self.x28); + println!("x27: 0x{:016x}", self.x27); + println!("x26: 0x{:016x}", self.x26); + println!("x25: 0x{:016x}", self.x25); + println!("x24: 0x{:016x}", self.x24); + println!("x23: 0x{:016x}", self.x23); + println!("x22: 0x{:016x}", self.x22); + println!("x21: 0x{:016x}", self.x21); + println!("x20: 0x{:016x}", self.x20); + println!("x19: 0x{:016x}", self.x19); + } +} + +impl super::Context { + pub fn get_fx_regs(&self) -> FloatRegisters { + if !self.arch.fx_loadable { + panic!("TODO: make get_fx_regs always work"); + } + + unsafe { ptr::read(self.kfx.as_ptr() as *const FloatRegisters) } + } + + pub fn set_fx_regs(&mut self, new: FloatRegisters) { + if !self.arch.fx_loadable { + panic!("TODO: make set_fx_regs always work"); + } + + unsafe { + ptr::write(self.kfx.as_mut_ptr() as *mut FloatRegisters, new); + } + } + pub fn current_syscall(&self) -> Option<[usize; 6]> { + if !self.inside_syscall { + return None; + } + let regs = self.regs()?; + let scratch = ®s.scratch; + Some([ + scratch.x8, scratch.x0, scratch.x1, scratch.x2, scratch.x3, scratch.x4, + ]) + } + + pub(crate) fn write_current_env_regs(&self, regs: EnvRegisters) -> Result<()> { + unsafe { + control_regs::tpidr_el0_write(regs.tpidr_el0 as u64); + control_regs::tpidrro_el0_write(regs.tpidrro_el0 as u64); + } + Ok(()) + } + + pub(crate) fn write_env_regs(&mut self, regs: EnvRegisters) -> Result<()> { + self.arch.tpidr_el0 = regs.tpidr_el0; + self.arch.tpidrro_el0 = regs.tpidrro_el0; + Ok(()) + } + + pub(crate) fn read_current_env_regs(&self) -> Result { + unsafe { + Ok(EnvRegisters { + tpidr_el0: control_regs::tpidr_el0() as usize, + tpidrro_el0: control_regs::tpidrro_el0() as usize, + }) + } + } + + pub(crate) fn read_env_regs(&self) -> Result { + Ok(EnvRegisters { + tpidr_el0: self.arch.tpidr_el0, + tpidrro_el0: self.arch.tpidrro_el0, + }) + } + pub fn set_userspace_io_allowed(&mut self, _allowed: bool) {} +} + +pub static EMPTY_CR3: Once = Once::new(); + +// SAFETY: EMPTY_CR3 must be initialized. +pub unsafe fn empty_cr3() -> rmm::PhysicalAddress { + debug_assert!(EMPTY_CR3.poll().is_some()); + *EMPTY_CR3.get_unchecked() +} + +#[target_feature(enable = "neon")] +unsafe extern "C" fn fp_save(float_regs: &mut FloatRegisters) { + core::arch::asm!( + "stp q0, q1, [{3}, {0} + 16 * 0]", + "stp q2, q3, [{3}, {0} + 16 * 2]", + "stp q4, q5, [{3}, {0} + 16 * 4]", + "stp q6, q7, [{3}, {0} + 16 * 6]", + "stp q8, q9, [{3}, {0} + 16 * 8]", + "stp q10, q11, [{3}, {0} + 16 * 10]", + "stp q12, q13, [{3}, {0} + 16 * 12]", + "stp q14, q15, [{3}, {0} + 16 * 14]", + "stp q16, q17, [{3}, {0} + 16 * 16]", + "stp q18, q19, [{3}, {0} + 16 * 18]", + "stp q20, q21, [{3}, {0} + 16 * 20]", + "stp q22, q23, [{3}, {0} + 16 * 22]", + "stp q24, q25, [{3}, {0} + 16 * 24]", + "stp q26, q27, [{3}, {0} + 16 * 26]", + "stp q28, q29, [{3}, {0} + 16 * 28]", + "stp q30, q31, [{3}, {0} + 16 * 30]", + "mrs x9, fpcr", + "add {3}, {3}, {1}", + "str x9, [{3}]", + "mrs x9, fpsr", + "str x9, [{3}, {2} - {1}]", + const mem::offset_of!(FloatRegisters, fp_simd_regs), + const mem::offset_of!(FloatRegisters, fpcr), + const mem::offset_of!(FloatRegisters, fpsr), + inout(reg) float_regs => _, + ); +} + +#[target_feature(enable = "neon")] +unsafe extern "C" fn fp_load(float_regs: &mut FloatRegisters) { + core::arch::asm!( + "ldp q0, q1, [{3}, {0} + 16 * 0]", + "ldp q2, q3, [{3}, {0} + 16 * 2]", + "ldp q4, q5, [{3}, {0} + 16 * 4]", + "ldp q6, q7, [{3}, {0} + 16 * 6]", + "ldp q8, q9, [{3}, {0} + 16 * 8]", + "ldp q10, q11, [{3}, {0} + 16 * 10]", + "ldp q12, q13, [{3}, {0} + 16 * 12]", + "ldp q14, q15, [{3}, {0} + 16 * 14]", + "ldp q16, q17, [{3}, {0} + 16 * 16]", + "ldp q18, q19, [{3}, {0} + 16 * 18]", + "ldp q20, q21, [{3}, {0} + 16 * 20]", + "ldp q22, q23, [{3}, {0} + 16 * 22]", + "ldp q24, q25, [{3}, {0} + 16 * 24]", + "ldp q26, q27, [{3}, {0} + 16 * 26]", + "ldp q28, q29, [{3}, {0} + 16 * 28]", + "ldp q30, q31, [{3}, {0} + 16 * 30]", + "add {3}, {3}, {1}", + "ldr x9, [{3}]", + "msr fpcr, x9", + "ldr x9, [{3}, {2} - {1}]", + "msr fpsr, x9", + const mem::offset_of!(FloatRegisters, fp_simd_regs), + const mem::offset_of!(FloatRegisters, fpcr), + const mem::offset_of!(FloatRegisters, fpsr), + inout(reg) float_regs => _, + ); +} + +pub unsafe fn switch_to(prev: &mut super::Context, next: &mut super::Context) { + fp_save(&mut *(prev.kfx.as_mut_ptr() as *mut FloatRegisters)); + + prev.arch.fx_loadable = true; + + if next.arch.fx_loadable { + fp_load(&mut *(next.kfx.as_mut_ptr() as *mut FloatRegisters)); + } + + PercpuBlock::current() + .new_addrsp_tmp + .set(next.addr_space.clone()); + + switch_to_inner(&mut prev.arch, &mut next.arch) +} + +#[naked] +unsafe extern "C" fn switch_to_inner(_prev: &mut Context, _next: &mut Context) { + core::arch::naked_asm!( + " + str x19, [x0, #{off_x19}] + ldr x19, [x1, #{off_x19}] + + str x20, [x0, #{off_x20}] + ldr x20, [x1, #{off_x20}] + + str x21, [x0, #{off_x21}] + ldr x21, [x1, #{off_x21}] + + str x22, [x0, #{off_x22}] + ldr x22, [x1, #{off_x22}] + + str x23, [x0, #{off_x23}] + ldr x23, [x1, #{off_x23}] + + str x24, [x0, #{off_x24}] + ldr x24, [x1, #{off_x24}] + + str x25, [x0, #{off_x25}] + ldr x25, [x1, #{off_x25}] + + str x26, [x0, #{off_x26}] + ldr x26, [x1, #{off_x26}] + + str x27, [x0, #{off_x27}] + ldr x27, [x1, #{off_x27}] + + str x28, [x0, #{off_x28}] + ldr x28, [x1, #{off_x28}] + + str x29, [x0, #{off_x29}] + ldr x29, [x1, #{off_x29}] + + str x30, [x0, #{off_x30}] + ldr x30, [x1, #{off_x30}] + + mrs x2, elr_el1 + str x2, [x0, #{off_elr_el1}] + ldr x2, [x1, #{off_elr_el1}] + msr elr_el1, x2 + + mrs x2, sp_el0 + str x2, [x0, #{off_sp_el0}] + ldr x2, [x1, #{off_sp_el0}] + msr sp_el0, x2 + + mrs x2, tpidr_el0 + str x2, [x0, #{off_tpidr_el0}] + ldr x2, [x1, #{off_tpidr_el0}] + msr tpidr_el0, x2 + + mrs x2, tpidrro_el0 + str x2, [x0, #{off_tpidrro_el0}] + ldr x2, [x1, #{off_tpidrro_el0}] + msr tpidrro_el0, x2 + + mrs x2, spsr_el1 + str x2, [x0, #{off_spsr_el1}] + ldr x2, [x1, #{off_spsr_el1}] + msr spsr_el1, x2 + + mrs x2, esr_el1 + str x2, [x0, #{off_esr_el1}] + ldr x2, [x1, #{off_esr_el1}] + msr esr_el1, x2 + + mov x2, sp + str x2, [x0, #{off_sp}] + ldr x2, [x1, #{off_sp}] + mov sp, x2 + + b {switch_hook} + ", + off_x19 = const(offset_of!(Context, x19)), + off_x20 = const(offset_of!(Context, x20)), + off_x21 = const(offset_of!(Context, x21)), + off_x22 = const(offset_of!(Context, x22)), + off_x23 = const(offset_of!(Context, x23)), + off_x24 = const(offset_of!(Context, x24)), + off_x25 = const(offset_of!(Context, x25)), + off_x26 = const(offset_of!(Context, x26)), + off_x27 = const(offset_of!(Context, x27)), + off_x28 = const(offset_of!(Context, x28)), + off_x29 = const(offset_of!(Context, fp)), + off_x30 = const(offset_of!(Context, lr)), + off_elr_el1 = const(offset_of!(Context, elr_el1)), + off_sp_el0 = const(offset_of!(Context, sp_el0)), + off_tpidr_el0 = const(offset_of!(Context, tpidr_el0)), + off_tpidrro_el0 = const(offset_of!(Context, tpidrro_el0)), + off_spsr_el1 = const(offset_of!(Context, spsr_el1)), + off_esr_el1 = const(offset_of!(Context, esr_el1)), + off_sp = const(offset_of!(Context, sp)), + + switch_hook = sym crate::context::switch_finish_hook, + ); +} + +/// Allocates a new empty utable +pub fn setup_new_utable() -> Result { + let utable = unsafe { + PageMapper::create(TableKind::User, crate::memory::TheFrameAllocator) + .ok_or(Error::new(ENOMEM))? + }; + + Ok(Table { utable }) +} diff --git a/src/context/arch/riscv64.rs b/src/context/arch/riscv64.rs new file mode 100644 index 00000000..1f484811 --- /dev/null +++ b/src/context/arch/riscv64.rs @@ -0,0 +1,244 @@ +use crate::{ + arch::{ + interrupt::InterruptStack, + paging::{PageMapper, ENTRY_COUNT}, + }, + context::{context::Kstack, memory::Table}, + memory::{KernelMapper, RmmA}, + percpu::PercpuBlock, + syscall::FloatRegisters, +}; +use core::{mem::offset_of, sync::atomic::AtomicBool}; +use rmm::{Arch, TableKind, VirtualAddress}; +use spin::Once; +use syscall::{error::*, EnvRegisters}; + +pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); + +pub const KFX_ALIGN: usize = 16; + +#[derive(Clone, Debug, Default)] +pub struct Context { + sp: usize, + ra: usize, + fp: usize, + s1: usize, + s2: usize, + s3: usize, + s4: usize, + s5: usize, + s6: usize, + s7: usize, + s8: usize, + s9: usize, + s10: usize, + s11: usize, + sstatus: usize, +} + +impl Context { + pub fn new() -> Self { + Self::default() + } + + fn set_stack(&mut self, address: usize) { + self.sp = address; + } + + fn set_ra(&mut self, address: usize) { + self.ra = address; + } + + fn set_s11(&mut self, address: usize) { + self.s11 = address; + } + + pub(crate) fn setup_initial_call( + &mut self, + stack: &Kstack, + func: extern "C" fn(), + userspace_allowed: bool, + ) { + let mut stack_top = stack.initial_top(); + + const INT_REGS_SIZE: usize = core::mem::size_of::(); + + if userspace_allowed { + unsafe { + // Zero-initialize InterruptStack registers. + stack_top = stack_top.sub(INT_REGS_SIZE); + stack_top.write_bytes(0_u8, INT_REGS_SIZE); + (&mut *stack_top.cast::()).init(); + } + } + + self.set_ra(crate::interrupt::syscall::enter_usermode as usize); + self.set_s11(func as usize); + + self.set_stack(stack_top as usize); + } +} + +impl super::Context { + pub fn get_fx_regs(&self) -> FloatRegisters { + unimplemented!() + } + + pub fn set_fx_regs(&mut self, mut _new: FloatRegisters) { + unimplemented!() + } + + pub fn current_syscall(&self) -> Option<[usize; 6]> { + if !self.inside_syscall { + return None; + } + let regs = self.regs()?; + let regs = ®s.registers; + Some([regs.x17, regs.x10, regs.x11, regs.x12, regs.x13, regs.x14]) + } + + pub(crate) fn write_current_env_regs(&mut self, regs: EnvRegisters) -> Result<()> { + self.write_env_regs(regs) + } + + pub(crate) fn write_env_regs(&mut self, regs: EnvRegisters) -> Result<()> { + if RmmA::virt_is_valid(VirtualAddress::new(regs.tp)) { + match self.regs_mut() { + Some(stack) => { + stack.registers.x4 = regs.tp; + Ok(()) + } + None => Err(Error::new(ESRCH)), + } + } else { + Err(Error::new(EINVAL)) + } + } + + pub(crate) fn read_current_env_regs(&self) -> Result { + self.read_env_regs() + } + + pub(crate) fn read_env_regs(&self) -> Result { + match self.regs() { + Some(stack) => Ok(EnvRegisters { + tp: stack.registers.x4, + }), + None => Err(Error::new(ESRCH)), + } + } + pub fn set_userspace_io_allowed(&mut self, _allowed: bool) {} +} + +pub static EMPTY_CR3: Once = Once::new(); + +// SAFETY: EMPTY_CR3 must be initialized. +pub unsafe fn empty_cr3() -> rmm::PhysicalAddress { + debug_assert!(EMPTY_CR3.poll().is_some()); + *EMPTY_CR3.get_unchecked() +} + +/// Switch to the next context by restoring its stack and registers +pub unsafe fn switch_to(prev: &mut super::Context, next: &mut super::Context) { + // FIXME floating point + PercpuBlock::current() + .new_addrsp_tmp + .set(next.addr_space.clone()); + + switch_to_inner(&mut prev.arch, &mut next.arch); +} + +#[naked] +unsafe extern "C" fn switch_to_inner(prev: &mut Context, next: &mut Context) { + core::arch::naked_asm!(r#" + sd s1, {off_s1}(a0) + ld s1, {off_s1}(a1) + + sd s2, {off_s2}(a0) + ld s2, {off_s2}(a1) + + sd s3, {off_s3}(a0) + ld s3, {off_s3}(a1) + + sd s4, {off_s4}(a0) + ld s4, {off_s4}(a1) + + sd s5, {off_s5}(a0) + ld s5, {off_s5}(a1) + + sd s6, {off_s6}(a0) + ld s6, {off_s6}(a1) + + sd s7, {off_s7}(a0) + ld s7, {off_s7}(a1) + + sd s8, {off_s8}(a0) + ld s8, {off_s8}(a1) + + sd s9, {off_s9}(a0) + ld s9, {off_s9}(a1) + + sd s10, {off_s10}(a0) + ld s10, {off_s10}(a1) + + sd s11, {off_s11}(a0) + ld s11, {off_s11}(a1) + + sd s11, {off_s11}(a0) + ld s11, {off_s11}(a1) + + sd sp, {off_sp}(a0) + ld sp, {off_sp}(a1) + + sd ra, {off_ra}(a0) + ld ra, {off_ra}(a1) + + sd fp, {off_fp}(a0) + ld fp, {off_fp}(a1) + + csrr t0, sstatus + sd t0, {off_sstatus}(a0) + ld t0, {off_sstatus}(a1) + csrw sstatus, t0 + + j {switch_hook} + "#, + off_s1 = const(offset_of!(Context, s1)), + off_s2 = const(offset_of!(Context, s2)), + off_s3 = const(offset_of!(Context, s3)), + off_s4 = const(offset_of!(Context, s4)), + off_s5 = const(offset_of!(Context, s5)), + off_s6 = const(offset_of!(Context, s6)), + off_s7 = const(offset_of!(Context, s7)), + off_s8 = const(offset_of!(Context, s8)), + off_s9 = const(offset_of!(Context, s9)), + off_s10 = const(offset_of!(Context, s10)), + off_s11 = const(offset_of!(Context, s11)), + off_sp = const(offset_of!(Context, sp)), + off_ra = const(offset_of!(Context, ra)), + off_fp = const(offset_of!(Context, fp)), + off_sstatus = const(offset_of!(Context, sstatus)), + + switch_hook = sym crate::context::switch_finish_hook, + ); +} + +/// Allocates a new empty utable +pub fn setup_new_utable() -> Result
{ + let utable = unsafe { + PageMapper::create(TableKind::User, crate::memory::TheFrameAllocator) + .ok_or(Error::new(ENOMEM))? + }; + + // Copy higher half (kernel) mappings + unsafe { + let active_ktable = KernelMapper::lock(); + for pde_no in ENTRY_COUNT / 2..ENTRY_COUNT { + if let Some(entry) = active_ktable.table().entry(pde_no) { + utable.table().set_entry(pde_no, entry); + } + } + } + + Ok(Table { utable }) +} diff --git a/src/context/arch/x86.rs b/src/context/arch/x86.rs new file mode 100644 index 00000000..4a98d9eb --- /dev/null +++ b/src/context/arch/x86.rs @@ -0,0 +1,340 @@ +use core::sync::atomic::AtomicBool; + +use crate::{ + gdt::{pcr, GDT_USER_FS, GDT_USER_GS}, + percpu::PercpuBlock, + syscall::FloatRegisters, +}; + +use crate::{ + arch::{interrupt::InterruptStack, paging::PageMapper}, + context::{context::Kstack, memory::Table}, + memory::RmmA, +}; +use core::mem::offset_of; +use rmm::{Arch, TableKind, VirtualAddress}; +use spin::Once; +use syscall::{error::*, EnvRegisters}; + +/// This must be used by the kernel to ensure that context switches are done atomically +/// Compare and exchange this to true when beginning a context switch on any CPU +/// The `Context::switch_to` function will set it back to false, allowing other CPU's to switch +/// This must be done, as no locks can be held on the stack during switch +pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); + +const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000; + +pub const KFX_ALIGN: usize = 16; + +#[derive(Clone, Debug)] +#[repr(C)] +pub struct Context { + /// EFLAGS register + eflags: usize, + /// EBX register + ebx: usize, + /// EDI register + edi: usize, + /// ESI register + esi: usize, + /// Base pointer + ebp: usize, + /// Stack pointer + pub(crate) esp: usize, + /// FSBASE. + /// + /// NOTE: Same fsgsbase behavior as with gsbase. + pub(crate) fsbase: usize, + /// GSBASE. + /// + /// NOTE: Without fsgsbase, this register will strictly be equal to the register value when + /// running. With fsgsbase, this is neither saved nor restored upon every syscall (there is no + /// need to!), and thus it must be re-read from the register before copying this struct. + pub(crate) gsbase: usize, + userspace_io_allowed: bool, +} + +impl Context { + pub fn new() -> Context { + Context { + eflags: 0, + ebx: 0, + edi: 0, + esi: 0, + ebp: 0, + esp: 0, + fsbase: 0, + gsbase: 0, + userspace_io_allowed: false, + } + } + + fn set_stack(&mut self, address: usize) { + self.esp = address; + } + + pub(crate) fn setup_initial_call( + &mut self, + stack: &Kstack, + func: extern "C" fn(), + userspace_allowed: bool, + ) { + let mut stack_top = stack.initial_top(); + + const INT_REGS_SIZE: usize = core::mem::size_of::(); + + unsafe { + if userspace_allowed { + // Zero-initialize InterruptStack registers. + stack_top = stack_top.sub(INT_REGS_SIZE); + stack_top.write_bytes(0_u8, INT_REGS_SIZE); + (&mut *stack_top.cast::()).init(); + + stack_top = stack_top.sub(core::mem::size_of::()); + stack_top + .cast::() + .write(crate::interrupt::syscall::enter_usermode as usize); + } + + stack_top = stack_top.sub(core::mem::size_of::()); + stack_top.cast::().write(func as usize); + } + + self.set_stack(stack_top as usize); + } +} + +impl super::Context { + pub fn get_fx_regs(&self) -> FloatRegisters { + let mut regs = unsafe { self.kfx.as_ptr().cast::().read() }; + regs._reserved = 0; + let mut new_st = regs.st_space; + for st in &mut new_st { + // Only allow access to the 80 lowest bits + *st &= !ST_RESERVED; + } + regs.st_space = new_st; + regs + } + + pub fn set_fx_regs(&mut self, mut new: FloatRegisters) { + { + let old = unsafe { &*(self.kfx.as_ptr().cast::()) }; + new._reserved = old._reserved; + let old_st = new.st_space; + let mut new_st = new.st_space; + for (new_st, old_st) in new_st.iter_mut().zip(&old_st) { + *new_st &= !ST_RESERVED; + *new_st |= old_st & ST_RESERVED; + } + new.st_space = new_st; + + // Make sure we don't use `old` from now on + } + + unsafe { + self.kfx.as_mut_ptr().cast::().write(new); + } + } + pub fn set_userspace_io_allowed(&mut self, allowed: bool) { + self.arch.userspace_io_allowed = allowed; + + if self.is_current_context() { + unsafe { + crate::gdt::set_userspace_io_allowed(allowed); + } + } + } + pub fn current_syscall(&self) -> Option<[usize; 6]> { + if !self.inside_syscall { + return None; + } + let regs = self.regs()?; + Some([ + regs.scratch.eax, + regs.preserved.ebx, + regs.scratch.ecx, + regs.scratch.edx, + regs.preserved.esi, + regs.preserved.edi, + ]) + } + + pub(crate) fn write_current_env_regs(&mut self, regs: EnvRegisters) -> Result<()> { + if RmmA::virt_is_valid(VirtualAddress::new(regs.fsbase as usize)) + && RmmA::virt_is_valid(VirtualAddress::new(regs.gsbase as usize)) + { + unsafe { + (&mut *pcr()).gdt[GDT_USER_FS].set_offset(regs.fsbase); + (&mut *pcr()).gdt[GDT_USER_GS].set_offset(regs.gsbase); + } + self.arch.fsbase = regs.fsbase as usize; + self.arch.gsbase = regs.gsbase as usize; + Ok(()) + } else { + Err(Error::new(EINVAL)) + } + } + + pub(crate) fn write_env_regs(&mut self, regs: EnvRegisters) -> Result<()> { + if RmmA::virt_is_valid(VirtualAddress::new(regs.fsbase as usize)) + && RmmA::virt_is_valid(VirtualAddress::new(regs.gsbase as usize)) + { + self.arch.fsbase = regs.fsbase as usize; + self.arch.gsbase = regs.gsbase as usize; + Ok(()) + } else { + Err(Error::new(EINVAL)) + } + } + + pub(crate) fn read_current_env_regs(&self) -> Result { + unsafe { + Ok(EnvRegisters { + fsbase: (&*pcr()).gdt[GDT_USER_FS].offset(), + gsbase: (&*pcr()).gdt[GDT_USER_GS].offset(), + }) + } + } + + pub(crate) fn read_env_regs(&self) -> Result { + Ok(EnvRegisters { + fsbase: self.arch.fsbase as u32, + gsbase: self.arch.gsbase as u32, + }) + } +} + +pub static EMPTY_CR3: Once = Once::new(); + +// SAFETY: EMPTY_CR3 must be initialized. +pub unsafe fn empty_cr3() -> rmm::PhysicalAddress { + debug_assert!(EMPTY_CR3.poll().is_some()); + *EMPTY_CR3.get_unchecked() +} + +/// Switch to the next context by restoring its stack and registers +pub unsafe fn switch_to(prev: &mut super::Context, next: &mut super::Context) { + if let Some(ref stack) = next.kstack { + crate::gdt::set_tss_stack(stack.initial_top() as usize); + } + crate::gdt::set_userspace_io_allowed(next.arch.userspace_io_allowed); + + core::arch::asm!(" + fxsave [{prev_fx}] + fxrstor [{next_fx}] + ", prev_fx = in(reg) prev.kfx.as_mut_ptr(), + next_fx = in(reg) next.kfx.as_ptr(), + ); + + { + let gdt = &mut (&mut *pcr()).gdt; + + prev.arch.fsbase = gdt[GDT_USER_FS].offset() as usize; + gdt[GDT_USER_FS].set_offset(next.arch.fsbase as u32); + prev.arch.gsbase = gdt[GDT_USER_GS].offset() as usize; + gdt[GDT_USER_GS].set_offset(next.arch.gsbase as u32); + } + PercpuBlock::current() + .new_addrsp_tmp + .set(next.addr_space.clone()); + + core::arch::asm!( + "call {inner}", + inner = sym switch_to_inner, + in("ecx") &mut prev.arch, + in("edx") &mut next.arch, + ); +} + +// Check disassembly! +#[naked] +unsafe extern "cdecl" fn switch_to_inner() { + use Context as Cx; + + core::arch::naked_asm!( + // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): + // + // - the current parameters are passed in the registers `edi`, `esi`, + // - we can modify scratch registers, e.g. rax + // - we cannot change callee-preserved registers arbitrarily, e.g. ebx, which is why we + // store them here in the first place. + concat!(" + // ecx is prev, edx is next + + // Save old registers, and load new ones + mov [ecx + {off_ebx}], ebx + mov ebx, [edx + {off_ebx}] + + mov [ecx + {off_edi}], edi + mov edi, [edx + {off_edi}] + + mov [ecx + {off_esi}], esi + mov esi, [edx + {off_esi}] + + mov [ecx + {off_ebp}], ebp + mov ebp, [edx + {off_ebp}] + + mov [ecx + {off_esp}], esp + mov esp, [edx + {off_esp}] + + // push EFLAGS (can only be modified via stack) + pushfd + // pop EFLAGS into `self.eflags` + pop DWORD PTR [ecx + {off_eflags}] + + // push `next.eflags` + push DWORD PTR [edx + {off_eflags}] + // pop into EFLAGS + popfd + + // When we return, we cannot even guarantee that the return address on the stack, points to + // the calling function, `context::switch`. Thus, we have to execute this Rust hook by + // ourselves, which will unlock the contexts before the later switch. + + // Note that switch_finish_hook will be responsible for executing `ret`. + jmp {switch_hook} + + "), + + off_eflags = const(offset_of!(Cx, eflags)), + + off_ebx = const(offset_of!(Cx, ebx)), + off_edi = const(offset_of!(Cx, edi)), + off_esi = const(offset_of!(Cx, esi)), + off_ebp = const(offset_of!(Cx, ebp)), + off_esp = const(offset_of!(Cx, esp)), + + switch_hook = sym crate::context::switch_finish_hook, + ); +} + +/// Allocates a new identically mapped ktable and empty utable (same memory on x86) +pub fn setup_new_utable() -> Result
{ + use crate::memory::KernelMapper; + + let utable = unsafe { + PageMapper::create(TableKind::User, crate::memory::TheFrameAllocator) + .ok_or(Error::new(ENOMEM))? + }; + + { + let active_ktable = KernelMapper::lock(); + + let copy_mapping = |p4_no| unsafe { + let entry = active_ktable + .table() + .entry(p4_no) + .unwrap_or_else(|| panic!("expected kernel PML {} to be mapped", p4_no)); + + utable.table().set_entry(p4_no, entry) + }; + + // Copy higher half (kernel) mappings + for i in 512..1024 { + copy_mapping(i); + } + } + + Ok(Table { utable }) +} diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index 25b1925a..21cecac4 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -1,6 +1,20 @@ -use core::mem; -use core::sync::atomic::AtomicBool; -use syscall::data::FloatRegisters; +use core::{ + ptr::{addr_of, addr_of_mut}, + sync::atomic::AtomicBool, +}; + +use crate::syscall::FloatRegisters; + +use crate::{ + arch::{interrupt::InterruptStack, paging::PageMapper}, + context::{context::Kstack, memory::Table}, + memory::RmmA, +}; +use core::mem::offset_of; +use rmm::{Arch, TableKind, VirtualAddress}; +use spin::Once; +use syscall::{error::*, EnvRegisters}; +use x86::msr; /// This must be used by the kernel to ensure that context switches are done atomically /// Compare and exchange this to true when beginning a context switch on any CPU @@ -10,14 +24,17 @@ pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000; +#[cfg(cpu_feature_never = "xsave")] +pub const KFX_ALIGN: usize = 16; + +#[cfg(not(cpu_feature_never = "xsave"))] +pub const KFX_ALIGN: usize = 64; + +// TODO: stack guarding? + #[derive(Clone, Debug)] +#[repr(C)] pub struct Context { - /// FX valid? - loadable: bool, - /// FX location - fx: usize, - /// Page table pointer - cr3: usize, /// RFLAGS register rflags: usize, /// RBX register @@ -33,15 +50,23 @@ pub struct Context { /// Base pointer rbp: usize, /// Stack pointer - rsp: usize + pub(crate) rsp: usize, + /// FSBASE. + /// + /// NOTE: Same fsgsbase behavior as with gsbase. + pub(crate) fsbase: usize, + /// GSBASE. + /// + /// NOTE: Without fsgsbase, this register will strictly be equal to the register value when + /// running. With fsgsbase, this is neither saved nor restored upon every syscall (there is no + /// need to!), and thus it must be re-read from the register before copying this struct. + pub(crate) gsbase: usize, + userspace_io_allowed: bool, } impl Context { pub fn new() -> Context { Context { - loadable: false, - fx: 0, - cr3: 0, rflags: 0, rbx: 0, r12: 0, @@ -49,19 +74,50 @@ impl Context { r14: 0, r15: 0, rbp: 0, - rsp: 0 + rsp: 0, + fsbase: 0, + gsbase: 0, + userspace_io_allowed: false, } } - pub fn get_page_table(&self) -> usize { - self.cr3 + fn set_stack(&mut self, address: usize) { + self.rsp = address; } - pub fn get_fx_regs(&self) -> Option { - if !self.loadable { - return None; + pub(crate) fn setup_initial_call( + &mut self, + stack: &Kstack, + func: extern "C" fn(), + userspace_allowed: bool, + ) { + let mut stack_top = stack.initial_top(); + + const INT_REGS_SIZE: usize = core::mem::size_of::(); + + unsafe { + if userspace_allowed { + // Zero-initialize InterruptStack registers. + stack_top = stack_top.sub(INT_REGS_SIZE); + stack_top.write_bytes(0_u8, INT_REGS_SIZE); + (&mut *stack_top.cast::()).init(); + + stack_top = stack_top.sub(core::mem::size_of::()); + stack_top + .cast::() + .write(crate::interrupt::syscall::enter_usermode as usize); + } + + stack_top = stack_top.sub(core::mem::size_of::()); + stack_top.cast::().write(func as usize); } - let mut regs = unsafe { *(self.fx as *const FloatRegisters) }; + + self.set_stack(stack_top as usize); + } +} +impl super::Context { + pub fn get_fx_regs(&self) -> FloatRegisters { + let mut regs = unsafe { self.kfx.as_ptr().cast::().read() }; regs._reserved = 0; let mut new_st = regs.st_space; for st in &mut new_st { @@ -69,158 +125,305 @@ impl Context { *st &= !ST_RESERVED; } regs.st_space = new_st; - Some(regs) + regs } - pub fn set_fx_regs(&mut self, mut new: FloatRegisters) -> bool { - if !self.loadable { - return false; - } - let old = unsafe { &*(self.fx as *const FloatRegisters) }; - new._reserved = old._reserved; - let old_st = new.st_space; - let mut new_st = new.st_space; - for (new_st, old_st) in new_st.iter_mut().zip(&old_st) { - *new_st &= !ST_RESERVED; - *new_st |= old_st & ST_RESERVED; - } - new.st_space = new_st; + pub fn set_fx_regs(&mut self, mut new: FloatRegisters) { + { + let old = unsafe { &*(self.kfx.as_ptr().cast::()) }; + new._reserved = old._reserved; + let old_st = new.st_space; + let mut new_st = new.st_space; + for (new_st, old_st) in new_st.iter_mut().zip(&old_st) { + *new_st &= !ST_RESERVED; + *new_st |= old_st & ST_RESERVED; + } + new.st_space = new_st; - // Make sure we don't use `old` from now on - drop(old); + // Make sure we don't use `old` from now on + } unsafe { - *(self.fx as *mut FloatRegisters) = new; + self.kfx.as_mut_ptr().cast::().write(new); } - true } - pub fn set_fx(&mut self, address: usize) { - self.fx = address; - } + pub fn set_userspace_io_allowed(&mut self, allowed: bool) { + self.arch.userspace_io_allowed = allowed; - pub fn set_page_table(&mut self, address: usize) { - self.cr3 = address; + if self.is_current_context() { + unsafe { + crate::gdt::set_userspace_io_allowed(crate::gdt::pcr(), allowed); + } + } } - pub fn set_stack(&mut self, address: usize) { - self.rsp = address; + pub(crate) fn current_syscall(&self) -> Option<[usize; 6]> { + if !self.inside_syscall { + return None; + } + let regs = self.regs()?; + let scratch = ®s.scratch; + Some([ + scratch.rax, + scratch.rdi, + scratch.rsi, + scratch.rdx, + scratch.r10, + scratch.r8, + ]) } - pub unsafe fn signal_stack(&mut self, handler: extern fn(usize), sig: u8) { - self.push_stack(sig as usize); - self.push_stack(handler as usize); - self.push_stack(signal_handler_wrapper as usize); + pub(crate) fn read_current_env_regs(&self) -> Result { + // TODO: Avoid rdmsr if fsgsbase is not enabled, if this is worth optimizing for. + unsafe { + Ok(EnvRegisters { + fsbase: msr::rdmsr(msr::IA32_FS_BASE), + gsbase: msr::rdmsr(msr::IA32_KERNEL_GSBASE), + }) + } } - pub unsafe fn push_stack(&mut self, value: usize) { - self.rsp -= mem::size_of::(); - *(self.rsp as *mut usize) = value; + pub(crate) fn read_env_regs(&self) -> Result { + Ok(EnvRegisters { + fsbase: self.arch.fsbase as u64, + gsbase: self.arch.gsbase as u64, + }) } - pub unsafe fn pop_stack(&mut self) -> usize { - let value = *(self.rsp as *const usize); - self.rsp += mem::size_of::(); - value - } + pub(crate) fn write_current_env_regs(&mut self, regs: EnvRegisters) -> Result<()> { + if RmmA::virt_is_valid(VirtualAddress::new(regs.fsbase as usize)) + && RmmA::virt_is_valid(VirtualAddress::new(regs.gsbase as usize)) + { + unsafe { + x86::msr::wrmsr(x86::msr::IA32_FS_BASE, regs.fsbase as u64); + // We have to write to KERNEL_GSBASE, because when the kernel returns to + // userspace, it will have executed SWAPGS first. + x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, regs.gsbase as u64); + } + self.arch.fsbase = regs.fsbase as usize; + self.arch.gsbase = regs.gsbase as usize; - /// Switch to the next context by restoring its stack and registers - #[cold] - #[inline(never)] - #[naked] - pub unsafe fn switch_to(&mut self, next: &mut Context) { - asm!("fxsave64 [$0]" : : "r"(self.fx) : "memory" : "intel", "volatile"); - self.loadable = true; - if next.loadable { - asm!("fxrstor64 [$0]" : : "r"(next.fx) : "memory" : "intel", "volatile"); - }else{ - asm!("fninit" : : : "memory" : "intel", "volatile"); + Ok(()) + } else { + Err(Error::new(EINVAL)) } + } - asm!("mov $0, cr3" : "=r"(self.cr3) : : "memory" : "intel", "volatile"); - if next.cr3 != self.cr3 { - asm!("mov cr3, $0" : : "r"(next.cr3) : "memory" : "intel", "volatile"); + pub(crate) fn write_env_regs(&mut self, regs: EnvRegisters) -> Result<()> { + if RmmA::virt_is_valid(VirtualAddress::new(regs.fsbase as usize)) + && RmmA::virt_is_valid(VirtualAddress::new(regs.gsbase as usize)) + { + self.arch.fsbase = regs.fsbase as usize; + self.arch.gsbase = regs.gsbase as usize; + Ok(()) + } else { + Err(Error::new(EINVAL)) } + } +} + +pub static EMPTY_CR3: Once = Once::new(); - asm!("pushfq ; pop $0" : "=r"(self.rflags) : : "memory" : "intel", "volatile"); - asm!("push $0 ; popfq" : : "r"(next.rflags) : "memory" : "intel", "volatile"); +// SAFETY: EMPTY_CR3 must be initialized. +pub unsafe fn empty_cr3() -> rmm::PhysicalAddress { + debug_assert!(EMPTY_CR3.poll().is_some()); + *EMPTY_CR3.get_unchecked() +} + +/// Switch to the next context by restoring its stack and registers +pub unsafe fn switch_to(prev: &mut super::Context, next: &mut super::Context) { + // Update contexts' timestamps + let switch_time = crate::time::monotonic(); + prev.cpu_time += switch_time.saturating_sub(prev.switch_time); + next.switch_time = switch_time; - asm!("mov $0, rbx" : "=r"(self.rbx) : : "memory" : "intel", "volatile"); - asm!("mov rbx, $0" : : "r"(next.rbx) : "memory" : "intel", "volatile"); + let pcr = crate::gdt::pcr(); + + if let Some(ref stack) = next.kstack { + crate::gdt::set_tss_stack(pcr, stack.initial_top() as usize); + } + crate::gdt::set_userspace_io_allowed(pcr, next.arch.userspace_io_allowed); - asm!("mov $0, r12" : "=r"(self.r12) : : "memory" : "intel", "volatile"); - asm!("mov r12, $0" : : "r"(next.r12) : "memory" : "intel", "volatile"); + core::arch::asm!( + alternative2!( + feature1: "xsaveopt", + then1: [" + mov eax, 0xffffffff + mov edx, eax + xsaveopt64 [{prev_fx}] + xrstor64 [{next_fx}] + "], + feature2: "xsave", + then2: [" + mov eax, 0xffffffff + mov edx, eax + xsave64 [{prev_fx}] + xrstor64 [{next_fx}] + "], + default: [" + fxsave64 [{prev_fx}] + fxrstor64 [{next_fx}] + "] + ), + prev_fx = in(reg) prev.kfx.as_mut_ptr(), + next_fx = in(reg) next.kfx.as_ptr(), + out("eax") _, + out("edx") _, + ); - asm!("mov $0, r13" : "=r"(self.r13) : : "memory" : "intel", "volatile"); - asm!("mov r13, $0" : : "r"(next.r13) : "memory" : "intel", "volatile"); + { + core::arch::asm!( + alternative!( + feature: "fsgsbase", + then: [" + mov rax, [{next}+{fsbase_off}] + mov rcx, [{next}+{gsbase_off}] - asm!("mov $0, r14" : "=r"(self.r14) : : "memory" : "intel", "volatile"); - asm!("mov r14, $0" : : "r"(next.r14) : "memory" : "intel", "volatile"); + rdfsbase rdx + wrfsbase rax + swapgs + rdgsbase rax + wrgsbase rcx + swapgs - asm!("mov $0, r15" : "=r"(self.r15) : : "memory" : "intel", "volatile"); - asm!("mov r15, $0" : : "r"(next.r15) : "memory" : "intel", "volatile"); + mov [{prev}+{fsbase_off}], rdx + mov [{prev}+{gsbase_off}], rax + "], + // TODO: Most applications will set FSBASE, but won't touch GSBASE. Maybe avoid + // wrmsr or even the swapgs+rdgsbase+wrgsbase+swapgs sequence if they are already + // equal? + default: [" + mov ecx, {MSR_FSBASE} + mov rdx, [{next}+{fsbase_off}] + mov eax, edx + shr rdx, 32 + wrmsr - asm!("mov $0, rsp" : "=r"(self.rsp) : : "memory" : "intel", "volatile"); - asm!("mov rsp, $0" : : "r"(next.rsp) : "memory" : "intel", "volatile"); + mov ecx, {MSR_KERNEL_GSBASE} + mov rdx, [{next}+{gsbase_off}] + mov eax, edx + shr rdx, 32 + wrmsr - asm!("mov $0, rbp" : "=r"(self.rbp) : : "memory" : "intel", "volatile"); - asm!("mov rbp, $0" : : "r"(next.rbp) : "memory" : "intel", "volatile"); + // {prev} + "] + ), + out("rax") _, + out("rdx") _, + out("ecx") _, prev = in(reg) addr_of_mut!(prev.arch), next = in(reg) addr_of!(next.arch), + MSR_FSBASE = const msr::IA32_FS_BASE, + MSR_KERNEL_GSBASE = const msr::IA32_KERNEL_GSBASE, + gsbase_off = const offset_of!(Context, gsbase), + fsbase_off = const offset_of!(Context, fsbase), + ); } -} -#[allow(dead_code)] -#[repr(packed)] -pub struct SignalHandlerStack { - r11: usize, - r10: usize, - r9: usize, - r8: usize, - rsi: usize, - rdi: usize, - rdx: usize, - rcx: usize, - rax: usize, - handler: extern fn(usize), - sig: usize, - rip: usize, + (*pcr).percpu.new_addrsp_tmp.set(next.addr_space.clone()); + + switch_to_inner(&mut prev.arch, &mut next.arch) } +// Check disassembly! #[naked] -unsafe extern fn signal_handler_wrapper() { - #[inline(never)] - unsafe fn inner(stack: &SignalHandlerStack) { - (stack.handler)(stack.sig); +unsafe extern "sysv64" fn switch_to_inner(_prev: &mut Context, _next: &mut Context) { + use Context as Cx; + + core::arch::naked_asm!( + // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): + // + // - the current parameters are passed in the registers `rdi`, `rsi`, + // - we can modify scratch registers, e.g. rax + // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we + // store them here in the first place. + concat!(" + // Save old registers, and load new ones + mov [rdi + {off_rbx}], rbx + mov rbx, [rsi + {off_rbx}] + + mov [rdi + {off_r12}], r12 + mov r12, [rsi + {off_r12}] + + mov [rdi + {off_r13}], r13 + mov r13, [rsi + {off_r13}] + + mov [rdi + {off_r14}], r14 + mov r14, [rsi + {off_r14}] + + mov [rdi + {off_r15}], r15 + mov r15, [rsi + {off_r15}] + + mov [rdi + {off_rbp}], rbp + mov rbp, [rsi + {off_rbp}] + + mov [rdi + {off_rsp}], rsp + mov rsp, [rsi + {off_rsp}] + + // push RFLAGS (can only be modified via stack) + pushfq + // pop RFLAGS into `self.rflags` + pop QWORD PTR [rdi + {off_rflags}] + + // push `next.rflags` + push QWORD PTR [rsi + {off_rflags}] + // pop into RFLAGS + popfq + + // When we return, we cannot even guarantee that the return address on the stack, points to + // the calling function, `context::switch`. Thus, we have to execute this Rust hook by + // ourselves, which will unlock the contexts before the later switch. + + // Note that switch_finish_hook will be responsible for executing `ret`. + jmp {switch_hook} + + "), + + off_rflags = const(offset_of!(Cx, rflags)), + + off_rbx = const(offset_of!(Cx, rbx)), + off_r12 = const(offset_of!(Cx, r12)), + off_r13 = const(offset_of!(Cx, r13)), + off_r14 = const(offset_of!(Cx, r14)), + off_r15 = const(offset_of!(Cx, r15)), + off_rbp = const(offset_of!(Cx, rbp)), + off_rsp = const(offset_of!(Cx, rsp)), + + switch_hook = sym crate::context::switch_finish_hook, + ); +} + +/// Allocates a new identically mapped ktable and empty utable (same memory on x86_64). +pub fn setup_new_utable() -> Result
{ + use crate::memory::{KernelMapper, TheFrameAllocator}; + + let utable = unsafe { + PageMapper::create(TableKind::User, TheFrameAllocator).ok_or(Error::new(ENOMEM))? + }; + + { + let active_ktable = KernelMapper::lock(); + + let copy_mapping = |p4_no| unsafe { + let entry = active_ktable + .table() + .entry(p4_no) + .unwrap_or_else(|| panic!("expected kernel PML {} to be mapped", p4_no)); + + utable.table().set_entry(p4_no, entry) + }; + // TODO: Just copy all 256 mappings? Or copy KERNEL_PML4+KERNEL_PERCPU_PML4 (needed for + // paranoid ISRs which can occur anywhere; we don't want interrupts to triple fault!) and + // map lazily via page faults in the kernel. + + // Copy kernel image mapping + copy_mapping(crate::KERNEL_PML4); + + // Copy kernel heap mapping + copy_mapping(crate::KERNEL_HEAP_PML4); + + // Copy physmap mapping + copy_mapping(crate::PHYS_PML4); } - // Push scratch registers - asm!("push rax - push rcx - push rdx - push rdi - push rsi - push r8 - push r9 - push r10 - push r11" - : : : : "intel", "volatile"); - - // Get reference to stack variables - let rsp: usize; - asm!("" : "={rsp}"(rsp) : : : "intel", "volatile"); - - // Call inner rust function - inner(&*(rsp as *const SignalHandlerStack)); - - // Pop scratch registers, error code, and return - asm!("pop r11 - pop r10 - pop r9 - pop r8 - pop rsi - pop rdi - pop rdx - pop rcx - pop rax - add rsp, 16" - : : : : "intel", "volatile"); + Ok(Table { utable }) } diff --git a/src/context/context.rs b/src/context/context.rs index 0a0f1869..ee57e029 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -1,309 +1,217 @@ -use alloc::sync::Arc; -use alloc::boxed::Box; -use alloc::vec::Vec; -use alloc::collections::VecDeque; -use core::alloc::{GlobalAlloc, Layout}; -use core::cmp::Ordering; -use core::mem; -use spin::Mutex; - -use crate::arch::{macros::InterruptStack, paging::PAGE_SIZE}; -use crate::common::unique::Unique; -use crate::context::arch; -use crate::context::file::FileDescriptor; -use crate::context::memory::{Grant, Memory, SharedMemory, Tls}; -use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::scheme::{SchemeNamespace, FileHandle}; -use crate::sync::WaitMap; -use crate::syscall::data::SigAction; -use crate::syscall::flag::SIG_DFL; - -/// Unique identifier for a context (i.e. `pid`). -use ::core::sync::atomic::AtomicUsize; -int_like!(ContextId, AtomicContextId, usize, AtomicUsize); +use alloc::{borrow::Cow, sync::Arc, vec::Vec}; +use arrayvec::ArrayString; +use core::{ + mem::{self, size_of}, + num::NonZeroUsize, + sync::atomic::{AtomicU32, Ordering}, +}; +use spin::RwLock; +use syscall::{SigProcControl, Sigcontrol}; + +#[cfg(feature = "sys_stat")] +use crate::cpu_stats; +use crate::{ + arch::{interrupt::InterruptStack, paging::PAGE_SIZE}, + common::aligned_box::AlignedBox, + context::{self, arch, file::FileDescriptor}, + cpu_set::{LogicalCpuId, LogicalCpuSet}, + ipi::{ipi, IpiKind, IpiTarget}, + memory::{allocate_p2frame, deallocate_p2frame, Enomem, Frame, RaiiFrame}, + paging::{RmmA, RmmArch}, + percpu::PercpuBlock, + scheme::{CallerCtx, FileHandle, SchemeNamespace}, +}; + +use crate::syscall::error::{Error, Result, EAGAIN, ESRCH}; + +use super::{ + empty_cr3, + file::FileDescription, + memory::{AddrSpaceWrapper, GrantFileRef}, +}; /// The status of a context - used for scheduling -/// See `syscall::process::waitpid` and the `sync` module for examples of usage -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug)] pub enum Status { Runnable, + + // TODO: Rename to SoftBlocked and move status_reason to this variant. + /// Not currently runnable, typically due to some blocking syscall, but it can be trivially + /// unblocked by e.g. signals. Blocked, - Stopped(usize), - Exited(usize) -} -#[derive(Copy, Clone, Debug)] -pub struct WaitpidKey { - pub pid: Option, - pub pgid: Option, + /// Not currently runnable, and cannot be runnable until manually unblocked, depending on what + /// reason. + HardBlocked { + reason: HardBlockedReason, + }, + Dead { + excp: Option, + }, } -impl Ord for WaitpidKey { - fn cmp(&self, other: &WaitpidKey) -> Ordering { - // If both have pid set, compare that - if let Some(s_pid) = self.pid { - if let Some(o_pid) = other.pid { - return s_pid.cmp(&o_pid); - } - } - - // If both have pgid set, compare that - if let Some(s_pgid) = self.pgid { - if let Some(o_pgid) = other.pgid { - return s_pgid.cmp(&o_pgid); - } - } - - // If either has pid set, it is greater - if self.pid.is_some() { - return Ordering::Greater; - } - - if other.pid.is_some() { - return Ordering::Less; - } - - // If either has pgid set, it is greater - if self.pgid.is_some() { - return Ordering::Greater; - } - - if other.pgid.is_some() { - return Ordering::Less; - } - - // If all pid and pgid are None, they are equal - Ordering::Equal +impl Status { + pub fn is_runnable(&self) -> bool { + matches!(self, Self::Runnable) } -} - -impl PartialOrd for WaitpidKey { - fn partial_cmp(&self, other: &WaitpidKey) -> Option { - Some(self.cmp(other)) + pub fn is_soft_blocked(&self) -> bool { + matches!(self, Self::Blocked) } } -impl PartialEq for WaitpidKey { - fn eq(&self, other: &WaitpidKey) -> bool { - self.cmp(other) == Ordering::Equal - } +#[derive(Clone, Debug)] +pub enum HardBlockedReason { + /// "SIGSTOP", only procmgr is allowed to switch contexts this state + Stopped, + AwaitingMmap { + file_ref: GrantFileRef, + }, + // TODO: PageFaultOom? + NotYetStarted, + PtraceStop, } -impl Eq for WaitpidKey {} +const CONTEXT_NAME_CAPAC: usize = 32; -/// A context, which identifies either a process or a thread +/// A context, which is typically mapped to a userspace thread #[derive(Debug)] pub struct Context { - /// The ID of this context - pub id: ContextId, - /// The group ID of this context - pub pgid: ContextId, - /// The ID of the parent context - pub ppid: ContextId, - /// The real user id - pub ruid: u32, - /// The real group id - pub rgid: u32, - /// The real namespace id - pub rns: SchemeNamespace, - /// The effective user id - pub euid: u32, - /// The effective group id - pub egid: u32, - /// The effective namespace id - pub ens: SchemeNamespace, - /// Signal mask - pub sigmask: [u64; 2], - /// Process umask - pub umask: usize, + pub debug_id: u32, + /// Signal handler + pub sig: Option, /// Status of context pub status: Status, + pub status_reason: &'static str, /// Context running or not pub running: bool, - /// CPU ID, if locked - pub cpu_id: Option, - /// Current system call - pub syscall: Option<(usize, usize, usize, usize, usize, usize)>, + /// Current CPU ID + pub cpu_id: Option, + /// Time this context was switched to + pub switch_time: u128, + /// Amount of CPU time used + pub cpu_time: u128, + /// Scheduler CPU affinity. If set, [`cpu_id`] can except [`None`] never be anything else than + /// this value. + pub sched_affinity: LogicalCpuSet, + /// Keeps track of whether this context is currently handling a syscall. Only up-to-date when + /// not running. + pub inside_syscall: bool, + + #[cfg(feature = "syscall_debug")] + pub syscall_debug_info: crate::syscall::debug::SyscallDebugInfo, + /// Head buffer to use when system call buffers are not page aligned - pub syscall_head: Box<[u8]>, + // TODO: Store in user memory? + pub syscall_head: Option, /// Tail buffer to use when system call buffers are not page aligned - pub syscall_tail: Box<[u8]>, - /// Context is halting parent - pub vfork: bool, - /// Context is being waited on - pub waitpid: Arc>, - /// Context should handle pending signals - pub pending: VecDeque, + // TODO: Store in user memory? + pub syscall_tail: Option, /// Context should wake up at specified time - pub wake: Option<(u64, u64)>, + pub wake: Option, /// The architecture specific context pub arch: arch::Context, /// Kernel FX - used to store SIMD and FPU registers on context switch - pub kfx: Option>, - /// Kernel stack - pub kstack: Option>, - /// Kernel signal backup: Registers, Kernel FX, Kernel Stack, Signal number - pub ksig: Option<(arch::Context, Option>, Option>, u8)>, - /// Restore ksig context on next switch - pub ksig_restore: bool, - /// Executable image - pub image: Vec, - /// User heap - pub heap: Option, - /// User stack - pub stack: Option, - /// User signal stack - pub sigstack: Option, - /// User Thread local storage - pub tls: Option, - /// User grants - pub grants: Arc>>, + pub kfx: AlignedBox<[u8], { arch::KFX_ALIGN }>, + /// Kernel stack, if located on the heap. + pub kstack: Option, + /// Address space containing a page table lock, and grants. Normally this will have a value, + /// but can be None while the context is being reaped or when a new context is created but has + /// not yet had its address space changed. Note that these are only for user mappings; kernel + /// mappings are universal and independent on address spaces or contexts. + pub addr_space: Option>, /// The name of the context - pub name: Arc>>, - /// The current working directory - pub cwd: Arc>>, + pub name: ArrayString, /// The open files in the scheme - pub files: Arc>>>, - /// Signal actions - pub actions: Arc>>, - /// The pointer to the user-space registers, saved after certain - /// interrupts. This pointer is somewhere inside kstack, and the - /// kstack address at the time of creation is the first element in - /// this tuple. - pub regs: Option<(usize, Unique)>, - /// A somewhat hacky way to initially stop a context when creating - /// a new instance of the proc: scheme, entirely separate from - /// signals or any other way to restart a process. - pub ptrace_stop: bool + pub files: Arc>>>, + /// All contexts except kmain will primarily live in userspace, and enter the kernel only when + /// interrupts or syscalls occur. This flag is set for all contexts but kmain. + pub userspace: bool, + pub being_sigkilled: bool, + pub fmap_ret: Option, + + // TODO: id can reappear after wraparound? + pub owner_proc_id: Option, + + // TODO: Temporary replacement for existing kernel logic, replace with capabilities! + pub ens: SchemeNamespace, + pub euid: u32, + pub egid: u32, + pub pid: usize, +} + +#[derive(Debug)] +pub struct SignalState { + /// Offset to jump to when a signal is received. + pub user_handler: NonZeroUsize, + /// Offset to jump to when a program fault occurs. If None, the context is sigkilled. + pub excp_handler: Option, + + /// Signal control pages, shared memory + pub thread_control: RaiiFrame, + pub proc_control: RaiiFrame, + /// Offset within the control pages of respective word-aligned structs. + pub threadctl_off: u16, + pub procctl_off: u16, } impl Context { - pub fn new(id: ContextId) -> Context { - let syscall_head = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(PAGE_SIZE, PAGE_SIZE)) as *mut [u8; PAGE_SIZE]) }; - let syscall_tail = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(PAGE_SIZE, PAGE_SIZE)) as *mut [u8; PAGE_SIZE]) }; - - Context { - id: id, - pgid: id, - ppid: ContextId::from(0), - ruid: 0, - rgid: 0, - rns: SchemeNamespace::from(0), - euid: 0, - egid: 0, - ens: SchemeNamespace::from(0), - sigmask: [0; 2], - umask: 0o022, - status: Status::Blocked, + pub fn new(owner_proc_id: Option) -> Result { + static DEBUG_ID: AtomicU32 = AtomicU32::new(1); + let this = Self { + debug_id: DEBUG_ID.fetch_add(1, Ordering::Relaxed), + sig: None, + status: Status::HardBlocked { + reason: HardBlockedReason::NotYetStarted, + }, + status_reason: "", running: false, cpu_id: None, - syscall: None, - syscall_head: syscall_head, - syscall_tail: syscall_tail, - vfork: false, - waitpid: Arc::new(WaitMap::new()), - pending: VecDeque::new(), + switch_time: 0, + cpu_time: 0, + sched_affinity: LogicalCpuSet::all(), + inside_syscall: false, + syscall_head: Some(RaiiFrame::allocate()?), + syscall_tail: Some(RaiiFrame::allocate()?), wake: None, arch: arch::Context::new(), - kfx: None, + kfx: AlignedBox::<[u8], { arch::KFX_ALIGN }>::try_zeroed_slice(crate::arch::kfx_size())?, kstack: None, - ksig: None, - ksig_restore: false, - image: Vec::new(), - heap: None, - stack: None, - sigstack: None, - tls: None, - grants: Arc::new(Mutex::new(Vec::new())), - name: Arc::new(Mutex::new(Vec::new().into_boxed_slice())), - cwd: Arc::new(Mutex::new(Vec::new())), - files: Arc::new(Mutex::new(Vec::new())), - actions: Arc::new(Mutex::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: 0, - }, - 0 - ); 128])), - regs: None, - ptrace_stop: false - } - } - - /// Make a relative path absolute - /// Given a cwd of "scheme:/path" - /// This function will turn "foo" into "scheme:/path/foo" - /// "/foo" will turn into "scheme:/foo" - /// "bar:/foo" will be used directly, as it is already absolute - pub fn canonicalize(&self, path: &[u8]) -> Vec { - let mut canon = if path.iter().position(|&b| b == b':').is_none() { - let cwd = self.cwd.lock(); + addr_space: None, + name: ArrayString::new(), + files: Arc::new(RwLock::new(Vec::new())), + userspace: false, + fmap_ret: None, + being_sigkilled: false, + owner_proc_id, - let mut canon = if !path.starts_with(b"/") { - let mut c = cwd.clone(); - if ! c.ends_with(b"/") { - c.push(b'/'); - } - c - } else { - cwd[..cwd.iter().position(|&b| b == b':').map_or(1, |i| i + 1)].to_vec() - }; + ens: 0.into(), + euid: 0, + egid: 0, + pid: 0, - canon.extend_from_slice(&path); - canon - } else { - path.to_vec() + #[cfg(feature = "syscall_debug")] + syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(), }; + #[cfg(feature = "sys_stat")] + cpu_stats::add_context(); + Ok(this) + } - // NOTE: assumes the scheme does not include anything like "../" or "./" - let mut result = { - let parts = canon.split(|&c| c == b'/') - .filter(|&part| part != b".") - .rev() - .scan(0, |nskip, part| { - if part == b"." { - Some(None) - } else if part == b".." { - *nskip += 1; - Some(None) - } else if *nskip > 0 { - *nskip -= 1; - Some(None) - } else { - Some(Some(part)) - } - }) - .filter_map(|x| x) - .filter(|x| !x.is_empty()) - .collect::>(); - parts - .iter() - .rev() - .fold(Vec::new(), |mut vec, &part| { - vec.extend_from_slice(part); - vec.push(b'/'); - vec - }) - }; - result.pop(); // remove extra '/' - - // replace with the root of the scheme if it's empty - if result.is_empty() { - let pos = canon.iter() - .position(|&b| b == b':') - .map_or(canon.len(), |p| p + 1); - canon.truncate(pos); - canon + /// Block the context, and return true if it was runnable before being blocked + pub fn block(&mut self, reason: &'static str) -> bool { + if self.status.is_runnable() { + self.status = Status::Blocked; + self.status_reason = reason; + true } else { - result + false } } - /// Block the context, and return true if it was runnable before being blocked - pub fn block(&mut self) -> bool { - if self.status == Status::Runnable { - self.status = Status::Blocked; + pub fn hard_block(&mut self, reason: HardBlockedReason) -> bool { + if self.status.is_runnable() { + self.status = Status::HardBlocked { reason }; + true } else { false @@ -312,14 +220,13 @@ impl Context { /// Unblock context, and return true if it was blocked before being marked runnable pub fn unblock(&mut self) -> bool { - if self.status == Status::Blocked { - self.status = Status::Runnable; - + if self.unblock_no_ipi() { + // TODO: Only send IPI if currently running? if let Some(cpu_id) = self.cpu_id { - if cpu_id != crate::cpu_id() { + if cpu_id != crate::cpu_id() { // Send IPI if not on current CPU ipi(IpiKind::Wakeup, IpiTarget::Other); - } + } } true @@ -328,6 +235,18 @@ impl Context { } } + /// Unblock context without IPI, and return true if it was blocked before being marked runnable + pub fn unblock_no_ipi(&mut self) -> bool { + if self.status.is_soft_blocked() { + self.status = Status::Runnable; + self.status_reason = ""; + + true + } else { + false + } + } + /// Add a file to the lowest available slot. /// Return the file descriptor number or None if no slot was found pub fn add_file(&self, file: FileDescriptor) -> Option { @@ -337,7 +256,7 @@ impl Context { /// Add a file to the lowest available slot greater than or equal to min. /// Return the file descriptor number or None if no slot was found pub fn add_file_min(&self, file: FileDescriptor, min: usize) -> Option { - let mut files = self.files.lock(); + let mut files = self.files.write(); for (i, file_option) in files.iter_mut().enumerate() { if file_option.is_none() && i >= min { *file_option = Some(file); @@ -360,9 +279,9 @@ impl Context { /// Get a file pub fn get_file(&self, i: FileHandle) -> Option { - let files = self.files.lock(); - if i.into() < files.len() { - files[i.into()].clone() + let files = self.files.read(); + if i.get() < files.len() { + files[i.get()].clone() } else { None } @@ -371,17 +290,16 @@ impl Context { /// Insert a file with a specific handle number. This is used by dup2 /// Return the file descriptor number or None if the slot was not empty, or i was invalid pub fn insert_file(&self, i: FileHandle, file: FileDescriptor) -> Option { - let mut files = self.files.lock(); - if i.into() < super::CONTEXT_MAX_FILES { - while i.into() >= files.len() { - files.push(None); - } - if files[i.into()].is_none() { - files[i.into()] = Some(file); - Some(i) - } else { - None - } + let mut files = self.files.write(); + if i.get() >= super::CONTEXT_MAX_FILES { + return None; + } + if i.get() >= files.len() { + files.resize_with(i.get() + 1, || None); + } + if files[i.get()].is_none() { + files[i.get()] = Some(file); + Some(i) } else { None } @@ -390,11 +308,237 @@ impl Context { /// Remove a file // TODO: adjust files vector to smaller size if possible pub fn remove_file(&self, i: FileHandle) -> Option { - let mut files = self.files.lock(); - if i.into() < files.len() { - files[i.into()].take() + let mut files = self.files.write(); + if i.get() < files.len() { + files[i.get()].take() } else { None } } + + pub fn is_current_context(&self) -> bool { + self.running && self.cpu_id == Some(crate::cpu_id()) + } + + pub fn addr_space(&self) -> Result<&Arc> { + self.addr_space.as_ref().ok_or(Error::new(ESRCH)) + } + pub fn set_addr_space( + &mut self, + addr_space: Option>, + ) -> Option> { + if let (Some(ref old), Some(ref new)) = (&self.addr_space, &addr_space) + && Arc::ptr_eq(old, new) + { + return addr_space; + }; + + if self.is_current_context() { + // TODO: Share more code with context::arch::switch_to. + let this_percpu = PercpuBlock::current(); + + if let Some(ref prev_addrsp) = self.addr_space { + assert!(Arc::ptr_eq( + &this_percpu.current_addrsp.borrow().as_ref().unwrap(), + prev_addrsp + )); + prev_addrsp + .acquire_read() + .used_by + .atomic_clear(this_percpu.cpu_id); + } + + let _old_addrsp = core::mem::replace( + &mut *this_percpu.current_addrsp.borrow_mut(), + addr_space.clone(), + ); + + if let Some(ref new) = addr_space { + let new_addrsp = new.acquire_read(); + new_addrsp.used_by.atomic_set(this_percpu.cpu_id); + + unsafe { + new_addrsp.table.utable.make_current(); + } + } else { + unsafe { + crate::paging::RmmA::set_table(rmm::TableKind::User, empty_cr3()); + } + } + } else { + assert!(!self.running); + } + + core::mem::replace(&mut self.addr_space, addr_space) + } + + fn can_access_regs(&self) -> bool { + self.userspace + } + + pub fn regs(&self) -> Option<&InterruptStack> { + if !self.can_access_regs() { + return None; + } + let Some(ref kstack) = self.kstack else { + return None; + }; + Some(unsafe { &*kstack.initial_top().sub(size_of::()).cast() }) + } + pub fn regs_mut(&mut self) -> Option<&mut InterruptStack> { + if !self.can_access_regs() { + return None; + } + let Some(ref mut kstack) = self.kstack else { + return None; + }; + Some(unsafe { &mut *kstack.initial_top().sub(size_of::()).cast() }) + } + pub fn sigcontrol(&mut self) -> Option<(&Sigcontrol, &SigProcControl, &mut SignalState)> { + Some(Self::sigcontrol_raw(self.sig.as_mut()?)) + } + pub fn sigcontrol_raw( + sig: &mut SignalState, + ) -> (&Sigcontrol, &SigProcControl, &mut SignalState) { + let check = |off| { + assert_eq!(usize::from(off) % mem::align_of::(), 0); + assert!(usize::from(off).saturating_add(mem::size_of::()) < PAGE_SIZE); + }; + check(sig.procctl_off); + check(sig.threadctl_off); + + let for_thread = unsafe { + &*(RmmA::phys_to_virt(sig.thread_control.get().base()).data() as *const Sigcontrol) + .byte_add(usize::from(sig.threadctl_off)) + }; + let for_proc = unsafe { + &*(RmmA::phys_to_virt(sig.proc_control.get().base()).data() as *const SigProcControl) + .byte_add(usize::from(sig.procctl_off)) + }; + + (for_thread, for_proc, sig) + } + pub fn caller_ctx(&self) -> CallerCtx { + CallerCtx { + uid: self.euid, + gid: self.egid, + pid: self.pid, + } + } +} + +/// Wrapper struct for borrowing the syscall head or tail buf. +#[derive(Debug)] +pub struct BorrowedHtBuf { + inner: Option, + head_and_not_tail: bool, +} +impl BorrowedHtBuf { + pub fn head() -> Result { + Ok(Self { + inner: Some( + context::current() + .write() + .syscall_head + .take() + .ok_or(Error::new(EAGAIN))?, + ), + head_and_not_tail: true, + }) + } + pub fn tail() -> Result { + Ok(Self { + inner: Some( + context::current() + .write() + .syscall_tail + .take() + .ok_or(Error::new(EAGAIN))?, + ), + head_and_not_tail: false, + }) + } + pub fn buf(&self) -> &[u8; PAGE_SIZE] { + unsafe { + &*(RmmA::phys_to_virt(self.inner.as_ref().expect("must succeed").get().base()).data() + as *const [u8; PAGE_SIZE]) + } + } + pub fn buf_mut(&mut self) -> &mut [u8; PAGE_SIZE] { + unsafe { + &mut *(RmmA::phys_to_virt(self.inner.as_mut().expect("must succeed").get().base()) + .data() as *mut [u8; PAGE_SIZE]) + } + } + pub fn frame(&self) -> Frame { + self.inner.as_ref().expect("must succeed").get() + } + /* + pub fn use_for_slice(&mut self, raw: UserSlice) -> Result> { + if raw.len() > self.buf().len() { + return Ok(None); + } + raw.copy_to_slice(&mut self.buf_mut()[..raw.len()])?; + Ok(Some(&self.buf()[..raw.len()])) + } + pub fn use_for_string(&mut self, raw: UserSlice) -> Result<&str> { + let slice = self.use_for_slice(raw)?.ok_or(Error::new(ENAMETOOLONG))?; + core::str::from_utf8(slice).map_err(|_| Error::new(EINVAL)) + } + pub unsafe fn use_for_struct(&mut self) -> Result<&mut T> { + if mem::size_of::() > PAGE_SIZE || mem::align_of::() > PAGE_SIZE { + return Err(Error::new(EINVAL)); + } + self.buf_mut().fill(0_u8); + Ok(unsafe { &mut *self.buf_mut().as_mut_ptr().cast() }) + } + */ +} +impl Drop for BorrowedHtBuf { + fn drop(&mut self) { + let context = context::current(); + + let Some(inner) = self.inner.take() else { + return; + }; + match context.write() { + mut context => { + (if self.head_and_not_tail { + &mut context.syscall_head + } else { + &mut context.syscall_tail + }) + .get_or_insert(inner); + } + } + } +} + +pub struct Kstack { + /// naturally aligned, order 4 + base: Frame, +} +impl Kstack { + pub fn new() -> Result { + Ok(Self { + base: allocate_p2frame(4).ok_or(Enomem)?, + }) + } + pub fn initial_top(&self) -> *mut u8 { + unsafe { (RmmA::phys_to_virt(self.base.base()).data() as *mut u8).add(PAGE_SIZE << 4) } + } + pub fn len(&self) -> usize { + PAGE_SIZE << 4 + } +} + +impl Drop for Kstack { + fn drop(&mut self) { + unsafe { deallocate_p2frame(self.base, 4) } + } +} +impl core::fmt::Debug for Kstack { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "[kstack at {:?}]", self.base) + } } diff --git a/src/context/file.rs b/src/context/file.rs index 51b25476..8f1d34c1 100644 --- a/src/context/file.rs +++ b/src/context/file.rs @@ -1,20 +1,60 @@ //! File structs +use crate::{ + event, + scheme::{self, SchemeId}, + syscall::error::{Error, Result, EBADF}, +}; use alloc::sync::Arc; -use crate::event; use spin::RwLock; -use crate::scheme::{self, SchemeId}; -use crate::syscall::error::{Result, Error, EBADF}; +use syscall::{schemev2::NewFdFlags, RwFlags, O_APPEND, O_NONBLOCK}; /// A file description -#[derive(Debug)] +#[derive(Clone, Copy, Debug)] pub struct FileDescription { + /// The current file offset (seek) + pub offset: u64, /// The scheme that this file refers to pub scheme: SchemeId, /// The number the scheme uses to refer to this file pub number: usize, /// The flags passed to open or fcntl(SETFL) - pub flags: usize, + pub flags: u32, + pub internal_flags: InternalFlags, +} +bitflags! { + #[derive(Clone, Copy, Debug)] + pub struct InternalFlags: u32 { + const POSITIONED = 1; + } +} +impl FileDescription { + pub fn rw_flags(&self, rw: RwFlags) -> u32 { + let mut ret = self.flags & !(O_NONBLOCK | O_APPEND) as u32; + if rw.contains(RwFlags::APPEND) { + ret |= O_APPEND as u32; + } + if rw.contains(RwFlags::NONBLOCK) { + ret |= O_NONBLOCK as u32; + } + ret + } +} +impl InternalFlags { + pub fn from_extra0(fl: u8) -> Option { + Some( + NewFdFlags::from_bits(fl)? + .iter() + .map(|fd| { + if fd == NewFdFlags::POSITIONED { + Self::POSITIONED + } else { + Self::empty() + } + }) + .collect(), + ) + } } /// A file descriptor @@ -27,21 +67,26 @@ pub struct FileDescriptor { pub cloexec: bool, } -impl FileDescriptor { - pub fn close(self) -> Result { - if let Ok(file) = Arc::try_unwrap(self.description) { - let file = file.into_inner(); +impl FileDescription { + /// Try closing a file, although at this point the description will be destroyed anyway, if + /// doing so fails. + pub fn try_close(self) -> Result<()> { + event::unregister_file(self.scheme, self.number); + + let scheme = scheme::schemes() + .get(self.scheme) + .ok_or(Error::new(EBADF))? + .clone(); - event::unregister_file(file.scheme, file.number); + scheme.on_close(self.number) + } +} - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(file.scheme).ok_or(Error::new(EBADF))?; - scheme.clone() - }; - scheme.close(file.number) - } else { - Ok(0) +impl FileDescriptor { + pub fn close(self) -> Result<()> { + if let Ok(file) = Arc::try_unwrap(self.description).map(RwLock::into_inner) { + file.try_close()?; } + Ok(()) } } diff --git a/src/context/list.rs b/src/context/list.rs deleted file mode 100644 index ef92c9ce..00000000 --- a/src/context/list.rs +++ /dev/null @@ -1,101 +0,0 @@ -use alloc::sync::Arc; -use alloc::boxed::Box; -use alloc::collections::BTreeMap; -use core::alloc::{GlobalAlloc, Layout}; -use core::{iter, mem}; -use core::sync::atomic::Ordering; -use crate::paging; -use spin::RwLock; - -use crate::syscall::error::{Result, Error, EAGAIN}; -use super::context::{Context, ContextId}; - -/// Context list type -pub struct ContextList { - map: BTreeMap>>, - next_id: usize -} - -impl ContextList { - /// Create a new context list. - pub fn new() -> Self { - ContextList { - map: BTreeMap::new(), - next_id: 1 - } - } - - /// Get the nth context. - pub fn get(&self, id: ContextId) -> Option<&Arc>> { - self.map.get(&id) - } - - /// Get an iterator of all parents - pub fn anchestors(&'_ self, id: ContextId) -> impl Iterator>)> + '_ { - iter::successors(self.get(id).map(|context| (id, context)), move |(_id, context)| { - let context = context.read(); - let id = context.ppid; - self.get(id).map(|context| (id, context)) - }) - } - - /// Get the current context. - pub fn current(&self) -> Option<&Arc>> { - self.map.get(&super::CONTEXT_ID.load(Ordering::SeqCst)) - } - - pub fn iter(&self) -> ::alloc::collections::btree_map::Iter>> { - self.map.iter() - } - - /// Create a new context. - pub fn new_context(&mut self) -> Result<&Arc>> { - if self.next_id >= super::CONTEXT_MAX_CONTEXTS { - self.next_id = 1; - } - - while self.map.contains_key(&ContextId::from(self.next_id)) { - self.next_id += 1; - } - - if self.next_id >= super::CONTEXT_MAX_CONTEXTS { - return Err(Error::new(EAGAIN)); - } - - let id = ContextId::from(self.next_id); - self.next_id += 1; - - assert!(self.map.insert(id, Arc::new(RwLock::new(Context::new(id)))).is_none()); - - Ok(self.map.get(&id).expect("Failed to insert new context. ID is out of bounds.")) - } - - /// Spawn a context from a function. - pub fn spawn(&mut self, func: extern fn()) -> Result<&Arc>> { - let context_lock = self.new_context()?; - { - let mut context = context_lock.write(); - let mut fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(512, 16)) as *mut [u8; 512]) }; - for b in fx.iter_mut() { - *b = 0; - } - let mut stack = vec![0; 65_536].into_boxed_slice(); - let offset = stack.len() - mem::size_of::(); - unsafe { - let offset = stack.len() - mem::size_of::(); - let func_ptr = stack.as_mut_ptr().offset(offset as isize); - *(func_ptr as *mut usize) = func as usize; - } - context.arch.set_page_table(unsafe { paging::ActivePageTable::new().address() }); - context.arch.set_fx(fx.as_ptr() as usize); - context.arch.set_stack(stack.as_ptr() as usize + offset); - context.kfx = Some(fx); - context.kstack = Some(stack); - } - Ok(context_lock) - } - - pub fn remove(&mut self, id: ContextId) -> Option>> { - self.map.remove(&id) - } -} diff --git a/src/context/memory.rs b/src/context/memory.rs index 22426c92..890eb5b2 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -1,484 +1,2889 @@ -use alloc::sync::{Arc, Weak}; -use alloc::collections::VecDeque; -use core::intrinsics; -use spin::Mutex; - -use crate::arch::paging::PAGE_SIZE; -use crate::context::file::FileDescriptor; -use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::Frame; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageIter, PhysicalAddress, VirtualAddress}; -use crate::paging::entry::EntryFlags; -use crate::paging::mapper::MapperFlushAll; -use crate::paging::temporary_page::TemporaryPage; +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use arrayvec::ArrayVec; +use core::{ + cmp, + fmt::Debug, + num::NonZeroUsize, + sync::atomic::{AtomicU32, Ordering}, +}; +use rmm::{Arch as _, PageFlush}; +use spin::{RwLock, RwLockReadGuard, RwLockUpgradableGuard, RwLockWriteGuard}; +use syscall::{error::*, flag::MapFlags, GrantFlags, MunmapFlags}; + +use crate::{ + arch::paging::PAGE_SIZE, + context::arch::setup_new_utable, + cpu_set::LogicalCpuSet, + memory::{ + deallocate_frame, deallocate_p2frame, get_page_info, init_frame, the_zeroed_frame, + AddRefError, Enomem, Frame, PageInfo, RaiiFrame, RefCount, RefKind, + }, + paging::{Page, PageFlags, PageMapper, RmmA, TableKind, VirtualAddress}, + percpu::PercpuBlock, + scheme::{self, KernelSchemes}, +}; + +use super::{context::HardBlockedReason, file::FileDescription}; + +pub const MMAP_MIN_DEFAULT: usize = PAGE_SIZE; + +pub fn page_flags(flags: MapFlags) -> PageFlags { + PageFlags::new() + .user(true) + .execute(flags.contains(MapFlags::PROT_EXEC)) + .write(flags.contains(MapFlags::PROT_WRITE)) + //TODO: PROT_READ +} +pub fn map_flags(page_flags: PageFlags) -> MapFlags { + let mut flags = MapFlags::PROT_READ; + if page_flags.has_write() { + flags |= MapFlags::PROT_WRITE; + } + if page_flags.has_execute() { + flags |= MapFlags::PROT_EXEC; + } + flags +} -#[derive(Debug)] -pub struct Grant { - start: VirtualAddress, - size: usize, - flags: EntryFlags, - mapped: bool, - owned: bool, - //TODO: This is probably a very heavy way to keep track of fmap'd files, perhaps move to the context? - pub desc_opt: Option, +pub struct UnmapResult { + pub file_desc: Option, + pub size: usize, + pub flags: MunmapFlags, } +impl UnmapResult { + pub fn unmap(mut self) -> Result<()> { + let Some(GrantFileRef { + base_offset, + description, + }) = self.file_desc.take() + else { + return Ok(()); + }; -impl Grant { - pub fn physmap(from: PhysicalAddress, to: VirtualAddress, size: usize, flags: EntryFlags) -> Grant { - let mut active_table = unsafe { ActivePageTable::new() }; + let (scheme_id, number) = match description.write() { + ref desc => (desc.scheme, desc.number), + }; - let mut flush_all = MapperFlushAll::new(); + let funmap_result = scheme::schemes() + .get(scheme_id) + .cloned() + .ok_or(Error::new(ENODEV)) + .and_then(|scheme| scheme.kfunmap(number, base_offset, self.size, self.flags)); - let start_page = Page::containing_address(to); - let end_page = Page::containing_address(VirtualAddress::new(to.get() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().get() - to.get() + from.get())); - let result = active_table.map_to(page, frame, flags); - flush_all.consume(result); + if let Ok(fd) = Arc::try_unwrap(description) { + fd.into_inner().try_close()?; } + funmap_result?; - flush_all.flush(&mut active_table); + Ok(()) + } +} - Grant { - start: to, - size, - flags, - mapped: true, - owned: false, - desc_opt: None, +#[derive(Debug)] +pub struct AddrSpaceWrapper { + inner: RwLock, + pub tlb_ack: AtomicU32, +} +impl AddrSpaceWrapper { + pub fn new() -> Result> { + Arc::try_new(Self { + inner: RwLock::new(AddrSpace::new()?), + tlb_ack: AtomicU32::new(0), + }) + .map_err(|_| Error::new(ENOMEM)) + } + pub fn acquire_read(&self) -> RwLockReadGuard<'_, AddrSpace> { + let my_percpu = PercpuBlock::current(); + + loop { + match self.inner.try_read() { + Some(g) => return g, + None => { + my_percpu.maybe_handle_tlb_shootdown(); + core::hint::spin_loop(); + } + } + } + } + pub fn acquire_upgradeable_read(&self) -> RwLockUpgradableGuard<'_, AddrSpace> { + let my_percpu = PercpuBlock::current(); + + loop { + match self.inner.try_upgradeable_read() { + Some(g) => return g, + None => { + my_percpu.maybe_handle_tlb_shootdown(); + core::hint::spin_loop(); + } + } } } + pub fn acquire_write(&self) -> RwLockWriteGuard<'_, AddrSpace> { + let my_percpu = PercpuBlock::current(); + + loop { + match self.inner.try_write() { + Some(g) => return g, + None => { + my_percpu.maybe_handle_tlb_shootdown(); + core::hint::spin_loop(); + } + } + } + } +} - pub fn map(to: VirtualAddress, size: usize, flags: EntryFlags) -> Grant { - let mut active_table = unsafe { ActivePageTable::new() }; +#[derive(Debug)] +pub struct AddrSpace { + pub table: Table, + pub grants: UserGrants, + pub used_by: LogicalCpuSet, + /// Lowest offset for mmap invocations where the user has not already specified the offset + /// (using MAP_FIXED/MAP_FIXED_NOREPLACE). Cf. Linux's `/proc/sys/vm/mmap_min_addr`, but with + /// the exception that we have a memory safe kernel which doesn't have to protect itself + /// against null pointers, so fixed mmaps to address zero are still allowed. + pub mmap_min: usize, +} +impl AddrSpaceWrapper { + /// Attempt to clone an existing address space so that all mappings are copied (CoW). + pub fn try_clone(&self) -> Result> { + let mut guard = self.acquire_write(); + let guard = &mut *guard; + + let mut new_arc = AddrSpaceWrapper::new()?; + + let new = + Arc::get_mut(&mut new_arc).expect("expected new address space Arc not to be aliased"); + + let this_mapper = &mut guard.table.utable; + let mut this_flusher = Flusher::with_cpu_set(&mut guard.used_by, &self.tlb_ack); + + for (grant_base, grant_info) in guard.grants.iter() { + let new_grant = match grant_info.provider { + // No, your temporary UserScheme mappings will not be kept across forks. + Provider::External { + is_pinned_userscheme_borrow: true, + .. + } + | Provider::AllocatedShared { + is_pinned_userscheme_borrow: true, + .. + } => continue, + + // No, physically contiguous driver memory won't either. + Provider::Allocated { + phys_contiguous: true, + .. + } => continue, + + Provider::PhysBorrowed { base } => Grant::physmap( + base.clone(), + PageSpan::new(grant_base, grant_info.page_count), + grant_info.flags, + &mut new.inner.get_mut().table.utable, + &mut NopFlusher, + )?, + Provider::Allocated { + ref cow_file_ref, + phys_contiguous: false, + } => Grant::copy_mappings( + grant_base, + grant_base, + grant_info.page_count, + grant_info.flags, + this_mapper, + &mut new.inner.get_mut().table.utable, + &mut this_flusher, + &mut NopFlusher, + CopyMappingsMode::Owned { + cow_file_ref: cow_file_ref.clone(), + }, + )?, + // TODO: Merge Allocated and AllocatedShared, and make CopyMappingsMode a field? + Provider::AllocatedShared { + is_pinned_userscheme_borrow: false, + } => Grant::copy_mappings( + grant_base, + grant_base, + grant_info.page_count, + grant_info.flags, + this_mapper, + &mut new.inner.get_mut().table.utable, + &mut this_flusher, + &mut NopFlusher, + CopyMappingsMode::Borrowed, + )?, + + // MAP_SHARED grants are retained by reference, across address space clones (the + // "fork" analogue from monolithic kernels). + Provider::External { + ref address_space, + src_base, + .. + } => Grant::borrow_grant( + Arc::clone(&address_space), + src_base, + grant_base, + grant_info, + &mut new.inner.get_mut().table.utable, + &mut NopFlusher, + false, + )?, + Provider::FmapBorrowed { .. } => continue, + }; + + new.inner.get_mut().grants.insert(new_grant); + } + Ok(new_arc) + } + pub fn mprotect(&self, requested_span: PageSpan, flags: MapFlags) -> Result<()> { + let mut guard = self.acquire_write(); + let guard = &mut *guard; + + let mapper = &mut guard.table.utable; + let mut flusher = Flusher::with_cpu_set(&mut guard.used_by, &self.tlb_ack); + + // TODO: Remove allocation (might require BTreeMap::set_key or interior mutability). + let regions = guard + .grants + .conflicts(requested_span) + .map(|(base, info)| { + if info.is_pinned() { + Err(Error::new(EBUSY)) + } else { + Ok(PageSpan::new(base, info.page_count)) + } + }) + .collect::>(); + + for grant_span_res in regions { + let grant_span = grant_span_res?; + + let grant = guard + .grants + .remove(grant_span.base) + .expect("grant cannot magically disappear while we hold the lock!"); + //log::info!("Mprotecting {:#?} to {:#?} in {:#?}", grant, flags, grant_span); + let intersection = grant_span.intersection(requested_span); + + let (before, mut grant, after) = grant + .extract(intersection) + .expect("failed to extract grant"); + //log::info!("Sliced into\n\n{:#?}\n\n{:#?}\n\n{:#?}", before, grant, after); + + if let Some(before) = before { + guard.grants.insert(before); + } + if let Some(after) = after { + guard.grants.insert(after); + } - let mut flush_all = MapperFlushAll::new(); + if !grant.info.can_have_flags(flags) { + guard.grants.insert(grant); + return Err(Error::new(EACCES)); + } - let start_page = Page::containing_address(to); - let end_page = Page::containing_address(VirtualAddress::new(to.get() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let result = active_table.map(page, flags); - flush_all.consume(result); - } + let new_flags = grant + .info + .flags() + // TODO: Require a capability in order to map executable memory? + .execute(flags.contains(MapFlags::PROT_EXEC)) + .write(flags.contains(MapFlags::PROT_WRITE)); - flush_all.flush(&mut active_table); + // TODO: Allow enabling/disabling read access on architectures which allow it. On + // x86_64 with protection keys (although only enforced by userspace), and AArch64 (I + // think), execute-only memory is also supported. - Grant { - start: to, - size, - flags, - mapped: true, - owned: true, - desc_opt: None, + grant.remap(mapper, &mut flusher, new_flags); + //log::info!("Mprotect grant became {:#?}", grant); + guard.grants.insert(grant); } + Ok(()) + } + #[must_use = "needs to notify files"] + pub fn munmap(&self, requested_span: PageSpan, unpin: bool) -> Result> { + let mut guard = self.acquire_write(); + let guard = &mut *guard; + + let mut flusher = Flusher::with_cpu_set(&mut guard.used_by, &self.tlb_ack); + AddrSpace::munmap_inner( + &mut guard.grants, + &mut guard.table.utable, + &mut flusher, + requested_span, + unpin, + ) } + pub fn r#move( + &self, + mut src_opt: Option<(&AddrSpaceWrapper, &mut AddrSpace)>, + src_span: PageSpan, + requested_dst_base: Option, + new_page_count: usize, + new_flags: MapFlags, + notify_files: &mut Vec, + ) -> Result { + let dst_lock = self; + let mut dst = dst_lock.acquire_write(); + let dst = &mut *dst; + + let mut src_owned_opt = src_opt.as_mut().map(|(aw, a)| { + ( + &mut a.grants, + &mut a.table.utable, + Flusher::with_cpu_set(&mut a.used_by, &aw.tlb_ack), + ) + }); + let mut src_opt = src_owned_opt + .as_mut() + .map(|(g, m, f)| (&mut *g, &mut *m, &mut *f)); + let mut dst_flusher = Flusher::with_cpu_set(&mut dst.used_by, &dst_lock.tlb_ack); + + let dst_base = match requested_dst_base { + Some(base) if new_flags.contains(MapFlags::MAP_FIXED_NOREPLACE) => { + if dst + .grants + .conflicts(PageSpan::new(base, new_page_count)) + .next() + .is_some() + { + return Err(Error::new(EEXIST)); + } + + base + } + Some(base) if new_flags.contains(MapFlags::MAP_FIXED) => { + let unpin = false; + notify_files.append(&mut AddrSpace::munmap_inner( + &mut dst.grants, + &mut dst.table.utable, + &mut dst_flusher, + PageSpan::new(base, new_page_count), + unpin, + )?); + + base + } + _ => { + dst.grants + .find_free(dst.mmap_min, cmp::max(new_page_count, src_span.count)) + .ok_or(Error::new(ENOMEM))? + .base + } + }; - pub fn map_inactive(from: VirtualAddress, to: VirtualAddress, size: usize, flags: EntryFlags, desc_opt: Option, new_table: &mut InactivePageTable, temporary_page: &mut TemporaryPage) -> Grant { - let mut active_table = unsafe { ActivePageTable::new() }; + let (src_grants, src_mapper, src_flusher) = src_opt.as_mut().map_or( + (&mut dst.grants, &mut dst.table.utable, &mut dst_flusher), + |(g, m, f)| (&mut *g, &mut *m, &mut *f), + ); - //TODO: Do not allocate - let mut frames = VecDeque::with_capacity(size/PAGE_SIZE); + if src_grants + .conflicts(src_span) + .any(|(_, g)| !g.can_extract(false)) + { + return Err(Error::new(EBUSY)); + } + if src_grants + .conflicts(src_span) + .any(|(_, g)| !g.can_have_flags(new_flags)) + { + return Err(Error::new(EPERM)); + } + if PageSpan::new(dst_base, new_page_count).intersects(src_span) { + return Err(Error::new(EBUSY)); + } - let start_page = Page::containing_address(from); - let end_page = Page::containing_address(VirtualAddress::new(from.get() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let frame = active_table.translate_page(page).expect("grant references unmapped memory"); - frames.push_back(frame); + if new_page_count < src_span.count { + let unpin = false; + notify_files.append(&mut AddrSpace::munmap_inner( + src_grants, + src_mapper, + src_flusher, + PageSpan::new( + src_span.base.next_by(new_page_count), + src_span.count - new_page_count, + ), + unpin, + )?); } - active_table.with(new_table, temporary_page, |mapper| { - let start_page = Page::containing_address(to); - let end_page = Page::containing_address(VirtualAddress::new(to.get() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let frame = frames.pop_front().expect("grant did not find enough frames"); - let result = mapper.map_to(page, frame, flags); - // Ignore result due to mapping on inactive table - unsafe { result.ignore(); } + let mut remaining_src_span = PageSpan::new(src_span.base, new_page_count); + + let to_remap = src_grants + .conflicts(remaining_src_span) + .map(|(b, _)| b) + .collect::>(); + + let mut prev_grant_end = src_span.base; + + //while let Some(grant_base) = next(src_opt.as_mut().map(|s| &mut **s), dst, remaining_src_span) { + for grant_base in to_remap { + if prev_grant_end < grant_base { + let hole_page_count = grant_base.offset_from(prev_grant_end); + let hole_span = PageSpan::new( + dst_base.next_by(prev_grant_end.offset_from(src_span.base)), + hole_page_count, + ); + dst.grants.insert(Grant::zeroed( + hole_span, + page_flags(new_flags), + &mut dst.table.utable, + &mut dst_flusher, + false, + )?); } - }); - ipi(IpiKind::Tlb, IpiTarget::Other); + let (src_grants, _, _) = src_opt.as_mut().map_or( + (&mut dst.grants, &mut dst.table.utable, &mut dst_flusher), + |(g, m, f)| (&mut *g, &mut *m, &mut *f), + ); + let grant = src_grants + .remove(grant_base) + .expect("grant cannot disappear"); + let grant_span = PageSpan::new(grant.base, grant.info.page_count()); + let (before, middle, after) = grant + .extract(remaining_src_span.intersection(grant_span)) + .expect("called intersect(), must succeed"); + + if let Some(before) = before { + src_grants.insert(before); + } + if let Some(after) = after { + src_grants.insert(after); + } - Grant { - start: to, - size, - flags, - mapped: true, - owned: false, - desc_opt, + let dst_grant_base = dst_base.next_by(middle.base.offset_from(src_span.base)); + let middle_span = middle.span(); + + let mut src_opt = src_opt + .as_mut() + .map(|(g, m, f)| (&mut *g, &mut *m, &mut *f)); + + dst.grants.insert(match src_opt.as_mut() { + Some((_, other_mapper, other_flusher)) => middle.transfer( + dst_grant_base, + page_flags(new_flags), + other_mapper, + Some(&mut dst.table.utable), + other_flusher, + &mut dst_flusher, + )?, + None => middle.transfer( + dst_grant_base, + page_flags(new_flags), + &mut dst.table.utable, + None, + &mut dst_flusher, + &mut NopFlusher, + )?, + }); + + prev_grant_end = middle_span.base.next_by(middle_span.count); + let pages_advanced = prev_grant_end.offset_from(remaining_src_span.base); + remaining_src_span = + PageSpan::new(prev_grant_end, remaining_src_span.count - pages_advanced); } + + if prev_grant_end < src_span.base.next_by(new_page_count) { + let last_hole_span = PageSpan::new( + dst_base.next_by(prev_grant_end.offset_from(src_span.base)), + new_page_count - prev_grant_end.offset_from(src_span.base), + ); + dst.grants.insert(Grant::zeroed( + last_hole_span, + page_flags(new_flags), + &mut dst.table.utable, + &mut dst_flusher, + false, + )?); + } + + Ok(dst_base) } + /// Borrows a page from user memory, requiring that the frame be Allocated and read/write. This + /// is intended to be used for user-kernel shared memory. + pub fn borrow_frame_enforce_rw_allocated(self: &Arc, page: Page) -> Result { + let mut guard = self.acquire_write(); - /// This function should only be used in clone! - pub fn secret_clone(&self, new_start: VirtualAddress) -> Grant { - assert!(self.mapped); + let (_start_page, info) = guard.grants.contains(page).ok_or(Error::new(EINVAL))?; - let mut active_table = unsafe { ActivePageTable::new() }; + if !info.can_have_flags(MapFlags::PROT_READ | MapFlags::PROT_WRITE) { + return Err(Error::new(EPERM)); + } + if !matches!(info.provider, Provider::Allocated { .. }) { + return Err(Error::new(EPERM)); + } - let mut flush_all = MapperFlushAll::new(); + let frame = if let Some((f, fl)) = guard.table.utable.translate(page.start_address()) + && fl.has_write() + { + Frame::containing(f) + } else { + let (frame, flush, new_guard) = correct_inner(self, guard, page, AccessMode::Write, 0) + .map_err(|_| Error::new(ENOMEM))?; + flush.flush(); + guard = new_guard; + + frame + }; - let start_page = Page::containing_address(self.start); - let end_page = Page::containing_address(VirtualAddress::new(self.start.get() + self.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); - let frame = active_table.translate_page(page).expect("grant references unmapped memory"); + let frame = match get_page_info(frame) + .expect("missing page info for Allocated grant") + .add_ref(RefKind::Shared) + { + Ok(_) => Ok(unsafe { RaiiFrame::new_unchecked(frame) }), + Err(AddRefError::RcOverflow) => Err(Error::new(ENOMEM)), + Err(AddRefError::SharedToCow) => unreachable!(), + Err(AddRefError::CowToShared) => unreachable!( + "if it was CoW, it was read-only, but in that case we already called correct_inner" + ), + }; + drop(guard); - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().get() - self.start.get() + new_start.get())); - if self.owned { - let result = active_table.map(new_page, EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::NO_EXECUTE); - flush_all.consume(result); - } else { - let result = active_table.map_to(new_page, frame, flags); - flush_all.consume(result); - } - } + frame + } +} +impl AddrSpace { + pub fn current() -> Result> { + PercpuBlock::current() + .current_addrsp + .borrow() + .clone() + .ok_or(Error::new(ESRCH)) + } + + pub fn new() -> Result { + Ok(Self { + grants: UserGrants::new(), + table: setup_new_utable()?, + mmap_min: MMAP_MIN_DEFAULT, + used_by: LogicalCpuSet::empty(), + }) + } + fn munmap_inner( + this_grants: &mut UserGrants, + this_mapper: &mut PageMapper, + this_flusher: &mut Flusher, + mut requested_span: PageSpan, + unpin: bool, + ) -> Result> { + let mut notify_files = Vec::new(); + + let next = |grants: &mut UserGrants, span: PageSpan| { + grants + .conflicts(span) + .map(|(base, info)| { + if info.is_pinned() && !unpin { + Err(Error::new(EBUSY)) + } else if !info.can_extract(unpin) { + Err(Error::new(EINVAL)) + } else { + Ok(PageSpan::new(base, info.page_count)) + } + }) + .next() + }; - flush_all.flush(&mut active_table); + while let Some(conflicting_span_res) = next(this_grants, requested_span) { + let conflicting_span = conflicting_span_res?; - if self.owned { - unsafe { - intrinsics::copy(self.start.get() as *const u8, new_start.get() as *mut u8, self.size); + let mut grant = this_grants + .remove(conflicting_span.base) + .expect("conflicting region didn't exist"); + if unpin { + grant.info.unpin(); } - let mut flush_all = MapperFlushAll::new(); + let intersection = conflicting_span.intersection(requested_span); + + requested_span = { + // In the following diagrams [---> indicates a range of + // base..base+count where the [ is at the base and > is at + // base+count. In other words, the [ is part of the range and + // the > is not part of the range. + if conflicting_span.end() < requested_span.end() { + // [------> conflicting_span + // [-------> requested_span + // [---> next requested_span + // or + // [----> conflicting_span + // [----------> requested_span + // [--> next requested_span + PageSpan::new( + conflicting_span.end(), + requested_span.end().offset_from(conflicting_span.end()), + ) + } else { + // [----------> conflicting_span + // [-----> requested_span + // next requested_span + // or + // [--------> conflicting_span + // [--------> requested_span + // next requested_span + PageSpan::empty() + } + }; - for page in Page::range_inclusive(start_page, end_page) { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); + let (before, grant, after) = grant + .extract(intersection) + .expect("conflicting region shared no common parts"); - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().get() - self.start.get() + new_start.get())); - let result = active_table.remap(new_page, flags); - flush_all.consume(result); + // Keep untouched regions + if let Some(before) = before { + this_grants.insert(before); + } + if let Some(after) = after { + this_grants.insert(after); } - flush_all.flush(&mut active_table); - } + // Remove irrelevant region + let unmap_result = grant.unmap(this_mapper, this_flusher); - Grant { - start: new_start, - size: self.size, - flags: self.flags, - mapped: true, - owned: self.owned, - desc_opt: self.desc_opt.clone() + // Notify scheme that holds grant + if unmap_result.file_desc.is_some() { + notify_files.push(unmap_result); + } } + + Ok(notify_files) + } + pub fn mmap_anywhere( + &mut self, + dst_lock: &AddrSpaceWrapper, + page_count: NonZeroUsize, + flags: MapFlags, + map: impl FnOnce(Page, PageFlags, &mut PageMapper, &mut Flusher) -> Result, + ) -> Result { + self.mmap(dst_lock, None, page_count, flags, &mut Vec::new(), map) } + pub fn mmap( + &mut self, + dst_lock: &AddrSpaceWrapper, + requested_base_opt: Option, + page_count: NonZeroUsize, + flags: MapFlags, + notify_files_out: &mut Vec, + map: impl FnOnce(Page, PageFlags, &mut PageMapper, &mut Flusher) -> Result, + ) -> Result { + debug_assert_eq!(dst_lock.inner.as_mut_ptr(), self as *mut Self); + + let selected_span = match requested_base_opt { + // TODO: Rename MAP_FIXED+MAP_FIXED_NOREPLACE to MAP_FIXED and + // MAP_FIXED_REPLACE/MAP_REPLACE? + Some(requested_base) => { + let requested_span = PageSpan::new(requested_base, page_count.get()); + + if flags.contains(MapFlags::MAP_FIXED_NOREPLACE) { + if self.grants.conflicts(requested_span).next().is_some() { + return Err(Error::new(EEXIST)); + } + requested_span + } else if flags.contains(MapFlags::MAP_FIXED) { + let unpin = false; + let mut notify_files = Self::munmap_inner( + &mut self.grants, + &mut self.table.utable, + &mut Flusher::with_cpu_set(&mut self.used_by, &dst_lock.tlb_ack), + requested_span, + unpin, + )?; + notify_files_out.append(&mut notify_files); + + requested_span + } else { + self.grants + .find_free_near(self.mmap_min, page_count.get(), Some(requested_base)) + .ok_or(Error::new(ENOMEM))? + } + } + None => self + .grants + .find_free(self.mmap_min, page_count.get()) + .ok_or(Error::new(ENOMEM))?, + }; - pub fn move_to(&mut self, new_start: VirtualAddress, new_table: &mut InactivePageTable, temporary_page: &mut TemporaryPage) { - assert!(self.mapped); + // TODO: Threads share address spaces, so not only the inactive flusher should be sending + // out IPIs. IPIs will only be sent when downgrading mappings (i.e. when a stale TLB entry + // will not be corrected by a page fault), and will furthermore require proper + // synchronization. - let mut active_table = unsafe { ActivePageTable::new() }; + let grant = map( + selected_span.base, + page_flags(flags), + &mut self.table.utable, + &mut Flusher::with_cpu_set(&mut self.used_by, &dst_lock.tlb_ack), + )?; + self.grants.insert(grant); - let mut flush_all = MapperFlushAll::new(); + Ok(selected_span.base) + } +} - let start_page = Page::containing_address(self.start); - let end_page = Page::containing_address(VirtualAddress::new(self.start.get() + self.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); - let (result, frame) = active_table.unmap_return(page, false); - flush_all.consume(result); +#[derive(Debug)] +pub struct UserGrants { + // Using a BTreeMap for it's range method. + inner: BTreeMap, + // Using a BTreeMap for it's range method. + holes: BTreeMap, + // TODO: Would an additional map ordered by (size,start) to allow for O(log n) allocations be + // beneficial? +} - active_table.with(new_table, temporary_page, |mapper| { - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().get() - self.start.get() + new_start.get())); - let result = mapper.map_to(new_page, frame, flags); - // Ignore result due to mapping on inactive table - unsafe { result.ignore(); } - }); +#[derive(Clone, Copy)] +pub struct PageSpan { + pub base: Page, + pub count: usize, +} +impl PageSpan { + pub fn new(base: Page, count: usize) -> Self { + Self { base, count } + } + pub fn empty() -> Self { + Self { + base: Page::containing_address(VirtualAddress::new(0)), + count: 0, } - - flush_all.flush(&mut active_table); - - self.start = new_start; } + pub fn validate_nonempty(address: VirtualAddress, size: usize) -> Option { + Self::validate(address, size).filter(|this| !this.is_empty()) + } + pub fn validate(address: VirtualAddress, size: usize) -> Option { + if address.data() % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 { + return None; + } + if address.data().saturating_add(size) > crate::USER_END_OFFSET { + return None; + } - pub fn start_address(&self) -> VirtualAddress { - self.start + Some(Self::new( + Page::containing_address(address), + size / PAGE_SIZE, + )) + } + pub fn is_empty(&self) -> bool { + self.count == 0 + } + pub fn intersection(&self, with: PageSpan) -> PageSpan { + Self::between( + cmp::max(self.base, with.base), + cmp::min(self.end(), with.end()), + ) + } + pub fn intersects(&self, with: PageSpan) -> bool { + !self.intersection(with).is_empty() + } + pub fn slice(&self, inner_span: PageSpan) -> (Option, PageSpan, Option) { + (self.before(inner_span), inner_span, self.after(inner_span)) + } + pub fn pages(self) -> impl Iterator { + (0..self.count).map(move |i| self.base.next_by(i)) } - pub fn size(&self) -> usize { - self.size + pub fn end(&self) -> Page { + self.base.next_by(self.count) } - pub fn flags(&self) -> EntryFlags { - self.flags + /// Returns the span from the start of self until the start of the specified span. + pub fn before(self, span: Self) -> Option { + assert!(self.base <= span.base); + Some(Self::between(self.base, span.base)).filter(|reg| !reg.is_empty()) } - pub unsafe fn set_mapped(&mut self, mapped: bool) { - self.mapped = mapped; + /// Returns the span from the end of the given span until the end of self. + pub fn after(self, span: Self) -> Option { + assert!(span.end() <= self.end()); + Some(Self::between(span.end(), self.end())).filter(|reg| !reg.is_empty()) + } + /// Returns the span between two pages, `[start, end)`, truncating to zero if end < start. + pub fn between(start: Page, end: Page) -> Self { + Self::new( + start, + end.start_address() + .data() + .saturating_sub(start.start_address().data()) + / PAGE_SIZE, + ) } +} - pub fn unmap(mut self) { - assert!(self.mapped); +impl Default for UserGrants { + fn default() -> Self { + Self::new() + } +} +impl Debug for PageSpan { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "[{:p}:{:p}, {} pages]", + self.base.start_address().data() as *const u8, + self.base + .start_address() + .add(self.count * PAGE_SIZE - 1) + .data() as *const u8, + self.count + ) + } +} - if self.owned { - println!("Grant::unmap: leaked {:?}", self); +impl UserGrants { + pub fn new() -> Self { + Self { + inner: BTreeMap::new(), + holes: core::iter::once((VirtualAddress::new(0), crate::USER_END_OFFSET)) + .collect::>(), } + } + /// Returns the grant, if any, which occupies the specified page + pub fn contains(&self, page: Page) -> Option<(Page, &GrantInfo)> { + self.inner + .range(..=page) + .next_back() + .filter(|(base, info)| (**base..base.next_by(info.page_count)).contains(&page)) + .map(|(base, info)| (*base, info)) + } + /// Returns an iterator over all grants that occupy some part of the + /// requested region + pub fn conflicts(&self, span: PageSpan) -> impl Iterator + '_ { + let start = self.contains(span.base); + + // If there is a grant that contains the base page, start searching at the base of that + // grant, rather than the requested base here. + let start_span = start + .map(|(base, info)| PageSpan::new(base, info.page_count)) + .unwrap_or(span); + + self.inner + .range(start_span.base..) + .take_while(move |(base, info)| PageSpan::new(**base, info.page_count).intersects(span)) + .map(|(base, info)| (*base, info)) + } + // TODO: DEDUPLICATE CODE! + pub fn conflicts_mut( + &mut self, + span: PageSpan, + ) -> impl Iterator + '_ { + let start = self.contains(span.base); + + // If there is a grant that contains the base page, start searching at the base of that + // grant, rather than the requested base here. + let start_span = start + .map(|(base, info)| PageSpan::new(base, info.page_count)) + .unwrap_or(span); + + self.inner + .range_mut(start_span.base..) + .take_while(move |(base, info)| PageSpan::new(**base, info.page_count).intersects(span)) + .map(|(base, info)| (*base, info)) + } + /// Return a free region with the specified size + // TODO: Alignment (x86_64: 4 KiB, 2 MiB, or 1 GiB). + // TODO: Support finding grant close to a requested address? + pub fn find_free_near( + &self, + min: usize, + page_count: usize, + _near: Option, + ) -> Option { + // Get first available hole, but do reserve the page starting from zero as most compiled + // languages cannot handle null pointers safely even if they point to valid memory. If an + // application absolutely needs to map the 0th page, they will have to do so explicitly via + // MAP_FIXED/MAP_FIXED_NOREPLACE. + // TODO: Allow explicitly allocating guard pages? Perhaps using mprotect or mmap with + // PROT_NONE? + + let (hole_start, _hole_size) = self + .holes + .iter() + .skip_while(|(hole_offset, hole_size)| hole_offset.data() + **hole_size <= min) + .find(|(hole_offset, hole_size)| { + let avail_size = + if hole_offset.data() <= min && min <= hole_offset.data() + **hole_size { + **hole_size - (min - hole_offset.data()) + } else { + **hole_size + }; + page_count * PAGE_SIZE <= avail_size + })?; + // Create new region + Some(PageSpan::new( + Page::containing_address(VirtualAddress::new(cmp::max(hole_start.data(), min))), + page_count, + )) + } + pub fn find_free(&self, min: usize, page_count: usize) -> Option { + self.find_free_near(min, page_count, None) + } + fn reserve(&mut self, base: Page, page_count: usize) { + let start_address = base.start_address(); + let size = page_count * PAGE_SIZE; + let end_address = base.start_address().add(size); + + let previous_hole = self.holes.range_mut(..start_address).next_back(); - let mut active_table = unsafe { ActivePageTable::new() }; + if let Some((hole_offset, hole_size)) = previous_hole { + let prev_hole_end = hole_offset.data() + *hole_size; - let mut flush_all = MapperFlushAll::new(); + // Note that prev_hole_end cannot exactly equal start_address, since that would imply + // there is another grant at that position already, as it would otherwise have been + // larger. - let start_page = Page::containing_address(self.start); - let end_page = Page::containing_address(VirtualAddress::new(self.start.get() + self.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let (result, _frame) = active_table.unmap_return(page, false); - flush_all.consume(result); + if prev_hole_end > start_address.data() { + // hole_offset must be below (but never equal to) the start address due to the + // `..start_address()` limit; hence, all we have to do is to shrink the + // previous offset. + *hole_size = start_address.data() - hole_offset.data(); + } + if prev_hole_end > end_address.data() { + // The grant is splitting this hole in two, so insert the new one at the end. + self.holes + .insert(end_address, prev_hole_end - end_address.data()); + } } - flush_all.flush(&mut active_table); + // Next hole + if let Some(hole_size) = self.holes.remove(&start_address) { + let remainder = hole_size - size; + if remainder > 0 { + self.holes.insert(end_address, remainder); + } + } + } + fn unreserve(holes: &mut BTreeMap, base: Page, page_count: usize) { + // TODO + let start_address = base.start_address(); + let size = page_count * PAGE_SIZE; + let end_address = base.start_address().add(size); + + // The size of any possible hole directly after the to-be-freed region. + let exactly_after_size = holes.remove(&end_address); + + // There was a range that began exactly prior to the to-be-freed region, so simply + // increment the size such that it occupies the grant too. If in addition there was a grant + // directly after the grant, include it too in the size. + if let Some((hole_offset, hole_size)) = holes + .range_mut(..start_address) + .next_back() + .filter(|(offset, size)| offset.data() + **size == start_address.data()) + { + *hole_size = end_address.data() - hole_offset.data() + exactly_after_size.unwrap_or(0); + } else { + // There was no free region directly before the to-be-freed region, however will + // now unconditionally insert a new free region where the grant was, and add that extra + // size if there was something after it. + holes.insert(start_address, size + exactly_after_size.unwrap_or(0)); + } + } + pub fn insert(&mut self, mut grant: Grant) { + assert!(self + .conflicts(PageSpan::new(grant.base, grant.info.page_count)) + .next() + .is_none()); + self.reserve(grant.base, grant.info.page_count); + + let before_region = self + .inner + .range(..grant.base) + .next_back() + .filter(|(base, info)| { + base.next_by(info.page_count) == grant.base + && info.can_be_merged_if_adjacent(&grant.info) + }) + .map(|(base, info)| (*base, info.page_count)); + + let after_region = self + .inner + .range(grant.span().end()..) + .next() + .filter(|(base, info)| { + **base == grant.base.next_by(grant.info.page_count) + && info.can_be_merged_if_adjacent(&grant.info) + }) + .map(|(base, info)| (*base, info.page_count)); + + if let Some((before_base, before_page_count)) = before_region { + grant.base = before_base; + grant.info.page_count += before_page_count; + + core::mem::forget(self.inner.remove(&before_base)); + } + if let Some((after_base, after_page_count)) = after_region { + grant.info.page_count += after_page_count; - if let Some(desc) = self.desc_opt.take() { - //TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap - let _ = desc.close(); + core::mem::forget(self.inner.remove(&after_base)); } - self.mapped = false; + self.inner.insert(grant.base, grant.info); + } + pub fn remove(&mut self, base: Page) -> Option { + let info = self.inner.remove(&base)?; + Self::unreserve(&mut self.holes, base, info.page_count); + Some(Grant { base, info }) } + pub fn iter(&self) -> impl Iterator + '_ { + self.inner.iter().map(|(base, info)| (*base, info)) + } + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + pub fn into_iter(self) -> impl Iterator { + self.inner + .into_iter() + .map(|(base, info)| Grant { base, info }) + } +} + +#[derive(Debug)] +pub struct GrantInfo { + page_count: usize, + flags: PageFlags, + // TODO: Rename to unmapped? + mapped: bool, + pub(crate) provider: Provider, +} - pub fn unmap_inactive(mut self, new_table: &mut InactivePageTable, temporary_page: &mut TemporaryPage) { - assert!(self.mapped); +/// Enumeration of various types of grants. +#[derive(Debug)] +pub enum Provider { + /// The grant is owned, but possibly CoW-shared. + /// + /// The pages this grant spans, need not necessarily be initialized right away, and can be + /// populated either from zeroed frames, the CoW zeroed frame, or from a scheme fmap call, if + /// mapped with MAP_LAZY. All frames must have an available PageInfo. + Allocated { + cow_file_ref: Option, + phys_contiguous: bool, + }, + + /// The grant is owned, but possibly shared. + /// + /// The pages may only be lazily initialized, if the address space has not yet been cloned (when forking). + /// + /// This type of grants is obtained from MAP_SHARED anonymous or `memory:` mappings, i.e. + /// allocated memory that remains shared after address space clones. + AllocatedShared { is_pinned_userscheme_borrow: bool }, + + /// The grant is not owned, but borrowed from physical memory frames that do not belong to the + /// frame allocator. The kernel will forbid borrowing any physical memory range, that the + /// memory map has indicated is regular allocatable RAM. + PhysBorrowed { base: Frame }, + + /// The memory is borrowed directly from another address space. + External { + address_space: Arc, + src_base: Page, + is_pinned_userscheme_borrow: bool, + }, + + /// The memory is MAP_SHARED borrowed from a scheme. + /// + /// Since the address space is not tracked here, all nonpresent pages must be present before + /// the fmap operation completes, unless MAP_LAZY is specified. They are tracked using + /// PageInfo, or treated as PhysBorrowed if any frame lacks a PageInfo. + FmapBorrowed { + file_ref: GrantFileRef, + pin_refcount: usize, + }, +} - if self.owned { - println!("Grant::unmap_inactive: leaked {:?}", self); - } +#[derive(Debug)] +pub struct Grant { + pub(crate) base: Page, + pub(crate) info: GrantInfo, +} - let mut active_table = unsafe { ActivePageTable::new() }; +#[derive(Clone, Debug)] +pub struct GrantFileRef { + pub description: Arc>, + pub base_offset: usize, +} - active_table.with(new_table, temporary_page, |mapper| { - let start_page = Page::containing_address(self.start); - let end_page = Page::containing_address(VirtualAddress::new(self.start.get() + self.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let (result, _frame) = mapper.unmap_return(page, false); - // This is not the active table, so the flush can be ignored - unsafe { result.ignore(); } - } - }); +impl Grant { + // TODO: PageCount newtype, to avoid confusion between bytes and pages? - ipi(IpiKind::Tlb, IpiTarget::Other); + // `base` must be mapped by the caller. + pub fn allocated_one_page_nomap(base: Page, flags: PageFlags) -> Grant { + Grant { + base, + info: GrantInfo { + page_count: 1, + flags, + mapped: true, + provider: Provider::Allocated { + cow_file_ref: None, + phys_contiguous: false, + }, + }, + } + } - if let Some(desc) = self.desc_opt.take() { - //TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap - let _ = desc.close(); + // TODO: is_pinned + pub fn allocated_shared_one_page( + frame: Frame, + page: Page, + flags: PageFlags, + mapper: &mut PageMapper, + flusher: &mut Flusher, + is_pinned: bool, + ) -> Result { + let info = get_page_info(frame).expect("needs page info"); + + // TODO: + // + // This may not necessarily hold, as even pinned memory can remain shared (e.g. proc: + // borrow), but it would probably be possible to forbid borrowing memory there as well. + // Maybe make it exclusive first using cow(), unless that is too expensive. + // + // assert_eq!(info.refcount(), RefCount::One); + + // Semantically, the page will be shared between the "context struct" and whatever + // else. + info.add_ref(RefKind::Shared) + .expect("must be possible if previously Zero"); + + unsafe { + mapper + .map_phys(page.start_address(), frame.base(), flags) + .ok_or(Error::new(ENOMEM))? + .ignore(); + + flusher.queue(frame, None, TlbShootdownActions::NEW_MAPPING); } - self.mapped = false; + Ok(Grant { + base: page, + info: GrantInfo { + page_count: 1, + flags, + mapped: true, + provider: Provider::AllocatedShared { + is_pinned_userscheme_borrow: is_pinned, + }, + }, + }) } -} -impl Drop for Grant { - fn drop(&mut self) { - assert!(!self.mapped); + pub fn physmap( + phys: Frame, + span: PageSpan, + flags: PageFlags, + mapper: &mut PageMapper, + flusher: &mut impl GenericFlusher, + ) -> Result { + const MAX_EAGER_PAGES: usize = 4096; + + for i in 0..span.count { + if let Some(info) = get_page_info(phys.next_by(i)) { + log::warn!("Driver tried to physmap the allocator-frame {phys:?} (info {info:?})!"); + return Err(Error::new(EPERM)); + } + } + + for (i, page) in span.pages().enumerate().take(MAX_EAGER_PAGES) { + let frame = phys.next_by(i); + unsafe { + let Some(result) = + mapper.map_phys(page.start_address(), frame.base(), flags.write(false)) + else { + break; + }; + result.ignore(); + + flusher.queue(frame, None, TlbShootdownActions::NEW_MAPPING); + } + } + + Ok(Grant { + base: span.base, + info: GrantInfo { + page_count: span.count, + flags, + mapped: true, + provider: Provider::PhysBorrowed { base: phys }, + }, + }) } -} + pub fn zeroed_phys_contiguous( + span: PageSpan, + flags: PageFlags, + mapper: &mut PageMapper, + flusher: &mut Flusher, + ) -> Result { + if !span.count.is_power_of_two() { + log::warn!("Attempted non-power-of-two zeroed_phys_contiguous allocation, rounding up to next power of two."); + } -#[derive(Clone, Debug)] -pub enum SharedMemory { - Owned(Arc>), - Borrowed(Weak>) -} + let alloc_order = span.count.next_power_of_two().trailing_zeros(); + let base = crate::memory::allocate_p2frame(alloc_order).ok_or(Enomem)?; + + for (i, page) in span.pages().enumerate() { + let frame = base.next_by(i); + + get_page_info(frame) + .expect("PageInfo must exist for allocated frame") + .refcount + .store(RefCount::One.to_raw(), Ordering::Relaxed); + + unsafe { + let result = mapper + .map_phys(page.start_address(), frame.base(), flags) + .expect("TODO: page table OOM"); + result.ignore(); -impl SharedMemory { - pub fn with(&self, f: F) -> T where F: FnOnce(&mut Memory) -> T { - match *self { - SharedMemory::Owned(ref memory_lock) => { - let mut memory = memory_lock.lock(); - f(&mut *memory) + flusher.queue(frame, None, TlbShootdownActions::NEW_MAPPING); + } + } + + Ok(Grant { + base: span.base, + info: GrantInfo { + page_count: span.count, + flags, + mapped: true, + provider: Provider::Allocated { + cow_file_ref: None, + phys_contiguous: true, + }, }, - SharedMemory::Borrowed(ref memory_weak) => { - let memory_lock = memory_weak.upgrade().expect("SharedMemory::Borrowed no longer valid"); - let mut memory = memory_lock.lock(); - f(&mut *memory) + }) + } + pub fn zeroed( + span: PageSpan, + flags: PageFlags, + mapper: &mut PageMapper, + flusher: &mut Flusher, + shared: bool, + ) -> Result { + const MAX_EAGER_PAGES: usize = 16; + + let (the_frame, the_frame_info) = the_zeroed_frame(); + + // TODO: Use flush_all after a certain number of pages, otherwise no + + for page in span.pages().take(MAX_EAGER_PAGES) { + // Good thing with lazy page fault handlers, is that if we fail due to ENOMEM here, we + // can continue and let the process face the OOM killer later. + unsafe { + the_frame_info + .add_ref(RefKind::Cow) + .expect("the static zeroed frame cannot be shared!"); + + let Some(result) = + mapper.map_phys(page.start_address(), the_frame.base(), flags.write(false)) + else { + break; + }; + result.ignore(); + flusher.queue(the_frame, None, TlbShootdownActions::NEW_MAPPING); } } + + Ok(Grant { + base: span.base, + info: GrantInfo { + page_count: span.count, + flags, + mapped: true, + provider: if shared { + Provider::AllocatedShared { + is_pinned_userscheme_borrow: false, + } + } else { + Provider::Allocated { + cow_file_ref: None, + phys_contiguous: false, + } + }, + }, + }) + } + + // XXX: borrow_grant is needed because of the borrow checker (iterator invalidation), maybe + // borrow_grant/borrow can be abstracted somehow? + pub fn borrow_grant( + src_address_space_lock: Arc, + src_base: Page, + dst_base: Page, + src_info: &GrantInfo, + _mapper: &mut PageMapper, + _dst_flusher: &mut impl GenericFlusher, + _eager: bool, + ) -> Result { + Ok(Grant { + base: dst_base, + info: GrantInfo { + page_count: src_info.page_count, + flags: src_info.flags, + mapped: true, + provider: Provider::External { + src_base, + address_space: src_address_space_lock, + is_pinned_userscheme_borrow: false, + }, + }, + }) } - pub fn borrow(&self) -> SharedMemory { - match *self { - SharedMemory::Owned(ref memory_lock) => SharedMemory::Borrowed(Arc::downgrade(memory_lock)), - SharedMemory::Borrowed(ref memory_lock) => SharedMemory::Borrowed(memory_lock.clone()) + pub fn borrow_fmap( + span: PageSpan, + new_flags: PageFlags, + file_ref: GrantFileRef, + src: Option>, + lock: &AddrSpaceWrapper, + mapper: &mut PageMapper, + flusher: &mut Flusher, + ) -> Result { + if let Some(src) = src { + let mut guard = src.addr_space_guard; + let mut src_addrspace = &mut *guard; + let mut src_flusher_state = + Flusher::with_cpu_set(&mut src_addrspace.used_by, &lock.tlb_ack).detach(); + for dst_page in span.pages() { + let src_page = src.src_base.next_by(dst_page.offset_from(span.base)); + + let (frame, is_cow) = match src.mode { + MmapMode::Shared => { + // TODO: Error code for "scheme responded with unmapped page"? + let frame = match src_addrspace + .table + .utable + .translate(src_page.start_address()) + { + Some((phys, _)) => Frame::containing(phys), + // TODO: ensure the correct context is hardblocked, if necessary + None => { + let (frame, _, new_guard) = correct_inner( + src.addr_space_lock, + guard, + src_page, + AccessMode::Read, + 0, + ) + .map_err(|_| Error::new(EIO))?; + guard = new_guard; + frame + } + }; + + (frame, false) + } + MmapMode::Cow => unsafe { + let frame = match guard + .table + .utable + .remap_with(src_page.start_address(), |flags| flags.write(false)) + { + Some((_, phys, _)) => Frame::containing(phys), + // TODO: ensure the correct context is hardblocked, if necessary + None => { + let (frame, _, new_guard) = correct_inner( + src.addr_space_lock, + guard, + src_page, + AccessMode::Read, + 0, + ) + .map_err(|_| Error::new(EIO))?; + guard = new_guard; + frame + } + }; + + (frame, true) + }, + }; + src_addrspace = &mut *guard; + + let frame = if let Some(page_info) = get_page_info(frame) { + match page_info.add_ref(RefKind::Shared) { + Ok(()) => frame, + Err(AddRefError::CowToShared) => unsafe { + let CowResult { + new_frame: new_cow_frame, + old_frame, + } = cow(frame, page_info, RefKind::Shared) + .map_err(|_| Error::new(ENOMEM))?; + + let (old_flags, _, _flush) = src_addrspace + .table + .utable + .remap_with_full(src_page.start_address(), |_, flags| { + (new_cow_frame.base(), flags) + }) + .expect("page did exist"); + + // TODO: flush.ignore() is correct, but seems to be amplifying a + // userspace race condition + // + //flush.ignore(); + + let mut src_flusher = Flusher { + active_cpus: &mut src_addrspace.used_by, + state: src_flusher_state, + }; + src_flusher.queue( + frame, + None, + TlbShootdownActions::change_of_flags(old_flags, new_flags), + ); + + if let Some(old_frame) = old_frame { + src_flusher.queue(old_frame, None, TlbShootdownActions::FREE); + } + src_flusher_state = src_flusher.detach(); + + // TODO: there used to be an additional remove_ref here, was that + // correct? + + new_cow_frame + }, + Err(AddRefError::SharedToCow) => unreachable!(), + Err(AddRefError::RcOverflow) => return Err(Error::new(ENOMEM)), + } + } else { + frame + }; + + unsafe { + let flush = mapper + .map_phys( + dst_page.start_address(), + frame.base(), + new_flags.write(new_flags.has_write() && !is_cow), + ) + .unwrap(); + flush.ignore(); + + flusher.queue(frame, None, TlbShootdownActions::NEW_MAPPING); + } + } } + + Ok(Self { + base: span.base, + info: GrantInfo { + page_count: span.count, + mapped: true, + flags: new_flags, + provider: Provider::FmapBorrowed { + file_ref, + pin_refcount: 0, + }, + }, + }) } -} -#[derive(Debug)] -pub struct Memory { - start: VirtualAddress, - size: usize, - flags: EntryFlags -} - -impl Memory { - pub fn new(start: VirtualAddress, size: usize, flags: EntryFlags, clear: bool) -> Self { - let mut memory = Memory { - start: start, - size: size, - flags: flags + /// Borrow all pages in the range `[src_base, src_base+page_count)` from `src_address_space`, + /// mapping them into `[dst_base, dst_base+page_count)`. The destination pages will lazily read + /// the page tables of the source pages, but once present in the destination address space, + /// pages that are unmaped or moved will not be made visible to the destination address space. + pub fn borrow( + src_address_space_lock: Arc, + src_address_space: &mut AddrSpace, + src_base: Page, + dst_base: Page, + page_count: usize, + map_flags: MapFlags, + dst_mapper: &mut PageMapper, + dst_flusher: &mut Flusher, + eager: bool, + _allow_phys: bool, + is_pinned_userscheme_borrow: bool, + ) -> Result { + let flags = page_flags(map_flags); + + const MAX_EAGER_PAGES: usize = 4096; + + let src_span = PageSpan::new(src_base, page_count); + let mut prev_span = None; + + for (src_grant_base, src_grant) in src_address_space.grants.conflicts_mut(src_span) { + let grant_span = PageSpan::new(src_grant_base, src_grant.page_count); + let prev_span = prev_span.replace(grant_span); + + if prev_span.is_none() && src_grant_base > src_base { + log::warn!( + "Grant too far away, prev_span {:?} src_base {:?} grant base {:?} grant {:#?}", + prev_span, + src_base, + src_grant_base, + src_grant + ); + return Err(Error::new(EINVAL)); + } else if let Some(prev) = prev_span + && prev.end() != src_grant_base + { + log::warn!( + "Hole between grants, prev_span {:?} src_base {:?} grant base {:?} grant {:#?}", + prev_span, + src_base, + src_grant_base, + src_grant + ); + return Err(Error::new(EINVAL)); + } + + if !src_grant.can_have_flags(map_flags) { + return Err(Error::new(EPERM)); + } + + if let Provider::FmapBorrowed { + ref mut pin_refcount, + .. + } = src_grant.provider + { + *pin_refcount += 1; + } + } + + let Some(last_span) = prev_span else { + log::warn!("Called Grant::borrow, but no grants were there!"); + return Err(Error::new(EINVAL)); }; - memory.map(clear); + if last_span.end() < src_span.end() { + log::warn!("Requested end page too far away from last grant"); + return Err(Error::new(EINVAL)); + } + if eager { + for (i, page) in PageSpan::new(src_base, page_count) + .pages() + .enumerate() + .take(MAX_EAGER_PAGES) + { + let Some((phys, _)) = src_address_space + .table + .utable + .translate(page.start_address()) + else { + continue; + }; + + let writable = match get_page_info(Frame::containing(phys)) { + None => true, + Some(i) => { + if i.add_ref(RefKind::Shared).is_err() { + continue; + }; + + i.allows_writable() + } + }; + + unsafe { + let flush = dst_mapper + .map_phys( + dst_base.next_by(i).start_address(), + phys, + flags.write(flags.has_write() && writable), + ) + .ok_or(Error::new(ENOMEM))?; + flush.ignore(); + + dst_flusher.queue( + Frame::containing(phys), + None, + TlbShootdownActions::NEW_MAPPING, + ); + } + } + } - memory + Ok(Grant { + base: dst_base, + info: GrantInfo { + page_count, + flags, + mapped: true, + provider: Provider::External { + address_space: src_address_space_lock, + src_base, + is_pinned_userscheme_borrow, + }, + }, + }) } + pub fn copy_mappings( + src_base: Page, + dst_base: Page, + page_count: usize, + flags: PageFlags, + src_mapper: &mut PageMapper, + dst_mapper: &mut PageMapper, + src_flusher: &mut Flusher, + dst_flusher: &mut impl GenericFlusher, + mode: CopyMappingsMode, + ) -> Result { + let (allows_writable, rk) = match mode { + CopyMappingsMode::Owned { .. } => (false, RefKind::Cow), + CopyMappingsMode::Borrowed => (true, RefKind::Shared), + }; - pub fn to_shared(self) -> SharedMemory { - SharedMemory::Owned(Arc::new(Mutex::new(self))) + // TODO: Page table iterator + for page_idx in 0..page_count { + let src_page = src_base.next_by(page_idx); + let dst_page = dst_base.next_by(page_idx).start_address(); + + let src_frame = match rk { + RefKind::Cow => { + let Some((_, phys, flush)) = (unsafe { + src_mapper.remap_with(src_page.start_address(), |flags| flags.write(false)) + }) else { + // Page is not mapped, let the page fault handler take care of that (initializing + // it to zero). + // + // TODO: If eager, allocate zeroed page if writable, or use *the* zeroed page (also + // for read-only)? + continue; + }; + unsafe { + flush.ignore(); + } + let frame = Frame::containing(phys); + src_flusher.queue(frame, None, TlbShootdownActions::REVOKE_WRITE); + frame + } + RefKind::Shared => { + if let Some((phys, _)) = src_mapper.translate(src_page.start_address()) { + Frame::containing(phys) + } else { + // TODO: Omit the unnecessary subsequent add_ref call. + let new_frame = init_frame(RefCount::One).expect("TODO: handle OOM"); + let src_flush = unsafe { + src_mapper + .map_phys(src_page.start_address(), new_frame.base(), flags) + .expect("TODO: handle OOM") + }; + unsafe { + src_flush.ignore(); + } + src_flusher.queue(new_frame, None, TlbShootdownActions::NEW_MAPPING); + + new_frame + } + } + }; + + let src_frame = { + let src_page_info = get_page_info(src_frame) + .expect("allocated page was not present in the global page array"); + + match src_page_info.add_ref(rk) { + Ok(()) => src_frame, + Err(AddRefError::CowToShared) => { + let CowResult { + new_frame, + old_frame, + } = cow(src_frame, src_page_info, rk).map_err(|_| Enomem)?; + if let Some(old_frame) = old_frame { + src_flusher.queue(old_frame, None, TlbShootdownActions::FREE); + } + + // TODO: Flusher + unsafe { + if let Some((_flags, phys, flush)) = src_mapper + .remap_with_full(src_page.start_address(), |_, f| { + (new_frame.base(), f) + }) + { + // TODO: flush.ignore() is correct, but seems to be amplifying a + // userspace race condition + // + //flush.ignore(); + flush.flush(); + + // FIXME: Is MOVE correct? + src_flusher.queue( + Frame::containing(phys), + None, + TlbShootdownActions::MOVE, + ); + } + } + + new_frame + } + // Cannot be shared and CoW simultaneously. + Err(AddRefError::SharedToCow) => { + // The call to cow() later implicitly removes one ref, so add it here + // first, even if Shared. + if src_page_info.add_ref(RefKind::Shared) == Err(AddRefError::RcOverflow) { + return Err(Enomem); + } + + // TODO: Copy in place, or use a zeroed page? + let CowResult { + new_frame, + old_frame, + } = cow(src_frame, src_page_info, rk).map_err(|_| Enomem)?; + if let Some(old_frame) = old_frame { + src_flusher.queue(old_frame, None, TlbShootdownActions::FREE); + } + new_frame + } + Err(AddRefError::RcOverflow) => return Err(Enomem), + } + }; + + let Some(map_result) = (unsafe { + dst_mapper.map_phys( + dst_page, + src_frame.base(), + flags.write(flags.has_write() && allows_writable), + ) + }) else { + break; + }; + unsafe { + map_result.ignore(); + } + + dst_flusher.queue(src_frame, None, TlbShootdownActions::NEW_MAPPING); + } + + Ok(Grant { + base: dst_base, + info: GrantInfo { + page_count, + flags, + mapped: true, + provider: match mode { + CopyMappingsMode::Owned { cow_file_ref } => Provider::Allocated { + cow_file_ref, + phys_contiguous: false, + }, + CopyMappingsMode::Borrowed => Provider::AllocatedShared { + is_pinned_userscheme_borrow: false, + }, + }, + }, + }) } + /// Move a grant between two address spaces. + pub fn transfer( + mut self, + dst_base: Page, + flags: PageFlags, + src_mapper: &mut PageMapper, + mut dst_mapper: Option<&mut PageMapper>, + src_flusher: &mut Flusher, + dst_flusher: &mut impl GenericFlusher, + ) -> Result { + assert!(!self.info.is_pinned()); + + for src_page in self.span().pages() { + let dst_page = dst_base.next_by(src_page.offset_from(self.base)); + + let unmap_parents = true; + + // TODO: Validate flags? + let Some((phys, _flags, flush)) = + (unsafe { src_mapper.unmap_phys(src_page.start_address(), unmap_parents) }) + else { + continue; + }; + unsafe { + flush.ignore(); + } + src_flusher.queue(Frame::containing(phys), None, TlbShootdownActions::MOVE); + + let dst_mapper = dst_mapper.as_deref_mut().unwrap_or(&mut *src_mapper); + + // TODO: Preallocate to handle OOM? + let flush = unsafe { + dst_mapper + .map_phys(dst_page.start_address(), phys, flags) + .expect("TODO: OOM") + }; + unsafe { + flush.ignore(); + } + dst_flusher.queue( + Frame::containing(phys), + None, + TlbShootdownActions::NEW_MAPPING, + ); + } - pub fn start_address(&self) -> VirtualAddress { - self.start + self.base = dst_base; + Ok(self) } - pub fn size(&self) -> usize { - self.size + // Caller must check this doesn't violate access rights for e.g. shared memory. + pub fn remap( + &mut self, + mapper: &mut PageMapper, + flusher: &mut Flusher, + flags: PageFlags, + ) { + assert!(self.info.mapped); + + for page in self.span().pages() { + unsafe { + // Lazy mappings don't require remapping, as info.flags will be updated. + let Some((old_flags, phys, flush)) = + mapper.remap_with(page.start_address(), |_| flags) + else { + continue; + }; + flush.ignore(); + //log::info!("Remapped page {:?} (frame {:?})", page, Frame::containing(mapper.translate(page.start_address()).unwrap().0)); + flusher.queue( + Frame::containing(phys), + None, + TlbShootdownActions::change_of_flags(old_flags, flags), + ); + } + } + + self.info.flags = flags; } + #[must_use = "will not unmap itself"] + pub fn unmap( + mut self, + mapper: &mut PageMapper, + flusher: &mut impl GenericFlusher, + ) -> UnmapResult { + assert!(self.info.mapped); + assert!(!self.info.is_pinned()); + + if let Provider::External { + ref address_space, + src_base, + .. + } = self.info.provider + { + let mut guard = address_space.acquire_write(); + + for (_, grant) in guard + .grants + .conflicts_mut(PageSpan::new(src_base, self.info.page_count)) + { + match grant.provider { + Provider::FmapBorrowed { + ref mut pin_refcount, + .. + } => { + *pin_refcount = pin_refcount + .checked_sub(1) + .expect("fmap pinning code is wrong") + } + _ => continue, + } + } + } - pub fn flags(&self) -> EntryFlags { - self.flags + let is_phys_contiguous = matches!( + self.info.provider, + Provider::Allocated { + phys_contiguous: true, + .. + } + ); + + // TODO: Add old debug assertions back, into Flusher. + let is_fmap_shared = match self.info.provider { + Provider::Allocated { .. } => Some(false), + Provider::AllocatedShared { .. } => None, + Provider::External { .. } => None, + Provider::PhysBorrowed { .. } => None, + Provider::FmapBorrowed { .. } => Some(true), + }; + + if is_phys_contiguous { + let (phys_base, _) = mapper.translate(self.base.start_address()).unwrap(); + let base_frame = Frame::containing(phys_base); + + for i in 0..self.info.page_count { + unsafe { + let (phys, _, flush) = mapper + .unmap_phys(self.base.next_by(i).start_address(), true) + .expect("all physborrowed grants must be fully Present in the page tables"); + flush.ignore(); + + assert_eq!(phys, base_frame.next_by(i).base()); + } + } + + flusher.queue( + base_frame, + Some(NonZeroUsize::new(self.info.page_count).unwrap()), + TlbShootdownActions::FREE, + ); + } else { + for page in self.span().pages() { + // Lazy mappings do not need to be unmapped. + let Some((phys, _, flush)) = + (unsafe { mapper.unmap_phys(page.start_address(), true) }) + else { + continue; + }; + unsafe { + flush.ignore(); + } + + flusher.queue(Frame::containing(phys), None, TlbShootdownActions::FREE); + } + } + + self.info.mapped = false; + + // Dummy value, won't be read. + let provider = core::mem::replace( + &mut self.info.provider, + Provider::AllocatedShared { + is_pinned_userscheme_borrow: false, + }, + ); + + let mut munmap_flags = MunmapFlags::empty(); + munmap_flags.set( + MunmapFlags::NEEDS_SYNC, + is_fmap_shared.unwrap_or(false) && self.info.flags.has_write(), + ); + + UnmapResult { + size: self.info.page_count * PAGE_SIZE, + file_desc: match provider { + Provider::Allocated { cow_file_ref, .. } => cow_file_ref, + Provider::FmapBorrowed { file_ref, .. } => Some(file_ref), + _ => None, + }, + flags: munmap_flags, + } } - pub fn pages(&self) -> PageIter { - let start_page = Page::containing_address(self.start); - let end_page = Page::containing_address(VirtualAddress::new(self.start.get() + self.size - 1)); - Page::range_inclusive(start_page, end_page) + /// Extract out a region into a separate grant. The return value is as + /// follows: (before, new split, after). Before and after may be `None`, + /// which occurs when the split off region is at the start or end of the + /// page respectively. + /// + /// # Panics + /// + /// Panics if the start or end addresses of the region is not aligned to the + /// page size. To round up the size to the nearest page size, use `.round()` + /// on the region. + /// + /// Also panics if the given region isn't completely contained within the + /// grant. Use `grant.intersect` to find a sub-region that works. + pub fn span(&self) -> PageSpan { + PageSpan::new(self.base, self.info.page_count) } + pub fn extract(mut self, span: PageSpan) -> Option<(Option, Grant, Option)> { + assert!(self.info.can_extract(false)); + + let (before_span, this_span, after_span) = self.span().slice(span); + + let before_grant = before_span.map(|span| Grant { + base: span.base, + info: GrantInfo { + flags: self.info.flags, + mapped: self.info.mapped, + page_count: span.count, + provider: match self.info.provider { + Provider::External { + ref address_space, + src_base, + .. + } => Provider::External { + address_space: Arc::clone(address_space), + src_base, + is_pinned_userscheme_borrow: false, + }, + Provider::Allocated { + ref cow_file_ref, .. + } => Provider::Allocated { + cow_file_ref: cow_file_ref.clone(), + phys_contiguous: false, + }, + Provider::AllocatedShared { .. } => Provider::AllocatedShared { + is_pinned_userscheme_borrow: false, + }, + Provider::PhysBorrowed { base } => { + Provider::PhysBorrowed { base: base.clone() } + } + Provider::FmapBorrowed { ref file_ref, .. } => Provider::FmapBorrowed { + file_ref: file_ref.clone(), + pin_refcount: 0, + }, + }, + }, + }); + + let middle_page_offset = before_grant.as_ref().map_or(0, |g| g.info.page_count); - fn map(&mut self, clear: bool) { - let mut active_table = unsafe { ActivePageTable::new() }; + match self.info.provider { + Provider::PhysBorrowed { ref mut base } => *base = base.next_by(middle_page_offset), + Provider::FmapBorrowed { + ref mut file_ref, .. + } + | Provider::Allocated { + cow_file_ref: Some(ref mut file_ref), + .. + } => file_ref.base_offset += middle_page_offset * PAGE_SIZE, + Provider::Allocated { + cow_file_ref: None, .. + } + | Provider::AllocatedShared { .. } + | Provider::External { .. } => (), + } + + let after_grant = after_span.map(|span| Grant { + base: span.base, + info: GrantInfo { + flags: self.info.flags, + mapped: self.info.mapped, + page_count: span.count, + provider: match self.info.provider { + Provider::Allocated { + cow_file_ref: None, .. + } => Provider::Allocated { + cow_file_ref: None, + phys_contiguous: false, + }, + Provider::AllocatedShared { .. } => Provider::AllocatedShared { + is_pinned_userscheme_borrow: false, + }, + Provider::Allocated { + cow_file_ref: Some(ref file_ref), + .. + } => Provider::Allocated { + cow_file_ref: Some(GrantFileRef { + base_offset: file_ref.base_offset + this_span.count * PAGE_SIZE, + description: Arc::clone(&file_ref.description), + }), + phys_contiguous: false, + }, + Provider::External { + ref address_space, + src_base, + .. + } => Provider::External { + address_space: Arc::clone(address_space), + src_base, + is_pinned_userscheme_borrow: false, + }, + + Provider::PhysBorrowed { base } => Provider::PhysBorrowed { + base: base.next_by(this_span.count), + }, + Provider::FmapBorrowed { ref file_ref, .. } => Provider::FmapBorrowed { + file_ref: GrantFileRef { + base_offset: file_ref.base_offset + this_span.count * PAGE_SIZE, + description: Arc::clone(&file_ref.description), + }, + pin_refcount: 0, + }, + }, + }, + }); - let mut flush_all = MapperFlushAll::new(); + self.base = this_span.base; + self.info.page_count = this_span.count; - for page in self.pages() { - let result = active_table.map(page, self.flags); - flush_all.consume(result); + Some((before_grant, self, after_grant)) + } +} +impl GrantInfo { + pub fn is_pinned(&self) -> bool { + matches!( + self.provider, + Provider::External { + is_pinned_userscheme_borrow: true, + .. + } | Provider::AllocatedShared { + is_pinned_userscheme_borrow: true, + .. + } | Provider::FmapBorrowed { + pin_refcount: 1.., + .. + } + ) + } + pub fn can_extract(&self, unpin: bool) -> bool { + !(self.is_pinned() && !unpin) + | matches!( + self.provider, + Provider::Allocated { + phys_contiguous: true, + .. + } + ) + } + pub fn unpin(&mut self) { + if let Provider::External { + ref mut is_pinned_userscheme_borrow, + .. } + | Provider::AllocatedShared { + ref mut is_pinned_userscheme_borrow, + .. + } = self.provider + { + *is_pinned_userscheme_borrow = false; + } + } - flush_all.flush(&mut active_table); + pub fn flags(&self) -> PageFlags { + self.flags + } + pub fn page_count(&self) -> usize { + self.page_count + } + pub fn can_have_flags(&self, flags: MapFlags) -> bool { + // TODO: read (some architectures support execute-only pages) + let is_downgrade = (self.flags.has_write() || !flags.contains(MapFlags::PROT_WRITE)) + && (self.flags.has_execute() || !flags.contains(MapFlags::PROT_EXEC)); + + match self.provider { + Provider::Allocated { .. } => true, + _ => is_downgrade, + } + } - if clear { - assert!(self.flags.contains(EntryFlags::WRITABLE)); - unsafe { - intrinsics::write_bytes(self.start_address().get() as *mut u8, 0, self.size); + pub fn can_be_merged_if_adjacent(&self, with: &Self) -> bool { + if self.mapped != with.mapped || self.flags.data() != with.flags.data() { + return false; + } + + match (&self.provider, &with.provider) { + ( + Provider::Allocated { + cow_file_ref: None, + phys_contiguous: false, + }, + Provider::Allocated { + cow_file_ref: None, + phys_contiguous: false, + }, + ) => true, + //(Provider::PhysBorrowed { base: ref lhs }, Provider::PhysBorrowed { base: ref rhs }) => lhs.next_by(self.page_count) == rhs.clone(), + //(Provider::External { address_space: ref lhs_space, src_base: ref lhs_base, cow: lhs_cow, .. }, Provider::External { address_space: ref rhs_space, src_base: ref rhs_base, cow: rhs_cow, .. }) => Arc::ptr_eq(lhs_space, rhs_space) && lhs_cow == rhs_cow && lhs_base.next_by(self.page_count) == rhs_base.clone(), + _ => false, + } + } + pub fn grant_flags(&self) -> GrantFlags { + let mut flags = GrantFlags::empty(); + // TODO: has_read + flags.set(GrantFlags::GRANT_READ, true); + + flags.set(GrantFlags::GRANT_WRITE, self.flags.has_write()); + flags.set(GrantFlags::GRANT_EXEC, self.flags.has_execute()); + + // TODO: Set GRANT_LAZY + + match self.provider { + Provider::External { + is_pinned_userscheme_borrow, + .. + } => { + flags.set(GrantFlags::GRANT_PINNED, is_pinned_userscheme_borrow); + flags |= GrantFlags::GRANT_SHARED; } + Provider::Allocated { + ref cow_file_ref, + phys_contiguous, + } => { + // !GRANT_SHARED is equivalent to "GRANT_PRIVATE" + flags.set(GrantFlags::GRANT_SCHEME, cow_file_ref.is_some()); + flags.set(GrantFlags::GRANT_PHYS_CONTIGUOUS, phys_contiguous); + } + Provider::AllocatedShared { + is_pinned_userscheme_borrow, + } => { + flags |= GrantFlags::GRANT_SHARED; + flags.set(GrantFlags::GRANT_PINNED, is_pinned_userscheme_borrow); + } + Provider::PhysBorrowed { .. } => { + flags |= GrantFlags::GRANT_SHARED | GrantFlags::GRANT_PHYS; + } + Provider::FmapBorrowed { .. } => { + flags |= GrantFlags::GRANT_SHARED | GrantFlags::GRANT_SCHEME; + } + } + + flags + } + pub fn file_ref(&self) -> Option<&GrantFileRef> { + if let Provider::FmapBorrowed { ref file_ref, .. } + | Provider::Allocated { + cow_file_ref: Some(ref file_ref), + .. + } = self.provider + { + Some(file_ref) + } else { + None } } +} + +impl Drop for GrantInfo { + #[track_caller] + fn drop(&mut self) { + // XXX: This will not show the address... + assert!( + !self.mapped, + "Grant dropped while still mapped: {:#x?}", + self + ); + } +} - fn unmap(&mut self) { - let mut active_table = unsafe { ActivePageTable::new() }; +pub const DANGLING: usize = 1 << (usize::BITS - 2); - let mut flush_all = MapperFlushAll::new(); +#[derive(Debug)] +pub struct Table { + pub utable: PageMapper, +} - for page in self.pages() { - let result = active_table.unmap(page); - flush_all.consume(result); +impl Drop for AddrSpace { + fn drop(&mut self) { + for mut grant in core::mem::take(&mut self.grants).into_iter() { + // Unpinning the grant is allowed, because pinning only occurs in UserScheme calls to + // prevent unmapping the mapped range twice (which would corrupt only the scheme + // provider), but it won't be able to double free any range after this address space + // has been dropped! + grant.info.unpin(); + + // TODO: Optimize away clearing the actual page tables? Since this address space is no + // longer arc-rwlock wrapped, it cannot be referenced `External`ly by borrowing grants, + // so it should suffice to iterate over PageInfos and decrement and maybe deallocate + // the underlying pages (and send some funmaps). + let res = grant.unmap(&mut self.table.utable, &mut NopFlusher); + + let _ = res.unmap(); } + } +} - flush_all.flush(&mut active_table); +impl Drop for Table { + fn drop(&mut self) { + if self.utable.is_current() { + // TODO: Do not flush (we immediately context switch after exit(), what else is there + // to do?). Instead, we can garbage-collect such page tables in the idle kernel context + // before it waits for interrupts. Or maybe not, depends on what future benchmarks will + // indicate. + unsafe { + RmmA::set_table(TableKind::User, super::empty_cr3()); + } + } + unsafe { + deallocate_frame(Frame::containing(self.utable.table().phys())); + } } +} - /// A complicated operation to move a piece of memory to a new page table - /// It also allows for changing the address at the same time - pub fn move_to(&mut self, new_start: VirtualAddress, new_table: &mut InactivePageTable, temporary_page: &mut TemporaryPage) { - let mut active_table = unsafe { ActivePageTable::new() }; +#[derive(Clone, Copy, PartialEq)] +pub enum AccessMode { + Read, + Write, + InstrFetch, +} - let mut flush_all = MapperFlushAll::new(); +#[derive(Debug)] +pub enum PfError { + Segv, + Oom, + NonfatalInternalError, + // TODO: Handle recursion limit by mapping a zeroed page? Or forbid borrowing borrowed memory, + // and ensure pages are mapped at grant time? + RecursionLimitExceeded, +} - for page in self.pages() { - let (result, frame) = active_table.unmap_return(page, false); - flush_all.consume(result); +pub struct CowResult { + /// New frame, which has been given an exclusive reference the caller can use. + pub new_frame: Frame, - active_table.with(new_table, temporary_page, |mapper| { - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().get() - self.start.get() + new_start.get())); - let result = mapper.map_to(new_page, frame, self.flags); - // This is not the active table, so the flush can be ignored - unsafe { result.ignore(); } - }); + /// Old frame. The caller must decrease its refcount if present, after it has shot down the TLB + /// of other CPUs properly. + pub old_frame: Option, +} + +/// Consumes an existing reference to old_frame, and then returns an exclusive frame, with refcount +/// either preinitialized to One or Shared(2) depending on initial_ref_kind. This may be the same +/// frame, or (if the refcount is modified simultaneously) a new frame whereas the old frame is +/// deallocated. +fn cow( + old_frame: Frame, + old_info: &PageInfo, + initial_ref_kind: RefKind, +) -> Result { + let old_refcount = old_info.refcount(); + assert!(old_refcount.is_some()); + + let initial_rc = match initial_ref_kind { + RefKind::Cow => RefCount::One, + RefKind::Shared => RefCount::Shared(NonZeroUsize::new(2).unwrap()), + }; + + if old_refcount == Some(RefCount::One) { + // We were lucky; the frame was already exclusively owned, so the refcount cannot be + // modified unless we modify it. This is the special case where the old_frame returned is + // None. + + if initial_ref_kind == RefKind::Shared { + old_info + .refcount + .store(initial_rc.to_raw(), Ordering::Relaxed); } + return Ok(CowResult { + new_frame: old_frame, + old_frame: None, + }); + } - flush_all.flush(&mut active_table); + let new_frame = init_frame(initial_rc)?; - self.start = new_start; + if old_frame != the_zeroed_frame().0 { + unsafe { + copy_frame_to_frame_directly(new_frame, old_frame); + } } - pub fn remap(&mut self, new_flags: EntryFlags) { - let mut active_table = unsafe { ActivePageTable::new() }; + Ok(CowResult { + new_frame, + old_frame: Some(old_frame), + }) +} - let mut flush_all = MapperFlushAll::new(); +fn map_zeroed( + mapper: &mut PageMapper, + page: Page, + page_flags: PageFlags, + _writable: bool, +) -> Result { + let new_frame = init_frame(RefCount::One)?; + + unsafe { + mapper + .map_phys(page.start_address(), new_frame.base(), page_flags) + .ok_or(PfError::Oom)? + .ignore(); + } - for page in self.pages() { - let result = active_table.remap(page, new_flags); - flush_all.consume(result); - } + Ok(new_frame) +} - flush_all.flush(&mut active_table); +pub unsafe fn copy_frame_to_frame_directly(dst: Frame, src: Frame) { + // Optimized exact-page-size copy function? - self.flags = new_flags; + // TODO: For new frames, when the kernel's linear phys=>virt mappings are 4k, this is almost + // guaranteed to cause either one (or two) TLB misses. + + let dst = unsafe { RmmA::phys_to_virt(dst.base()).data() as *mut u8 }; + let src = unsafe { RmmA::phys_to_virt(src.base()).data() as *const u8 }; + + unsafe { + dst.copy_from_nonoverlapping(src, PAGE_SIZE); } +} - pub fn resize(&mut self, new_size: usize, clear: bool) { - let mut active_table = unsafe { ActivePageTable::new() }; +pub fn try_correcting_page_tables(faulting_page: Page, access: AccessMode) -> Result<(), PfError> { + let Ok(addr_space_lock) = AddrSpace::current() else { + log::debug!("User page fault without address space being set."); + return Err(PfError::Segv); + }; - //TODO: Calculate page changes to minimize operations - if new_size > self.size { - let mut flush_all = MapperFlushAll::new(); + let lock = &addr_space_lock; + let (_, flush, _) = correct_inner(lock, lock.acquire_write(), faulting_page, access, 0)?; - let start_page = Page::containing_address(VirtualAddress::new(self.start.get() + self.size)); - let end_page = Page::containing_address(VirtualAddress::new(self.start.get() + new_size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_none() { - let result = active_table.map(page, self.flags); - flush_all.consume(result); + flush.flush(); + + Ok(()) +} +fn correct_inner<'l>( + addr_space_lock: &'l Arc, + mut addr_space_guard: RwLockWriteGuard<'l, AddrSpace>, + faulting_page: Page, + access: AccessMode, + recursion_level: u32, +) -> Result<(Frame, PageFlush, RwLockWriteGuard<'l, AddrSpace>), PfError> { + let mut addr_space = &mut *addr_space_guard; + let mut flusher = Flusher::with_cpu_set(&mut addr_space.used_by, &addr_space_lock.tlb_ack); + + let Some((grant_base, grant_info)) = addr_space.grants.contains(faulting_page) else { + log::debug!("Lacks grant"); + return Err(PfError::Segv); + }; + + let pages_from_grant_start = faulting_page.offset_from(grant_base); + + let grant_flags = grant_info.flags(); + match access { + // TODO: has_read + AccessMode::Read => (), + + AccessMode::Write if !grant_flags.has_write() => { + log::debug!("Write, but grant was not PROT_WRITE."); + return Err(PfError::Segv); + } + AccessMode::InstrFetch if !grant_flags.has_execute() => { + log::debug!("Instuction fetch, but grant was not PROT_EXEC."); + return Err(PfError::Segv); + } + + _ => (), + } + + // By now, the memory at the faulting page is actually valid, but simply not yet mapped, either + // at all, or with the required flags. + + let faulting_frame_opt = addr_space + .table + .utable + .translate(faulting_page.start_address()) + .map(|(phys, _page_flags)| Frame::containing(phys)); + let faulting_pageinfo_opt = faulting_frame_opt.map(|frame| (frame, get_page_info(frame))); + + // TODO: Aligned readahead? AMD Zen3+ CPUs can smash 4 4k pages that are 16k-aligned, into a + // single TLB entry, thus emulating 16k pages albeit with higher page table overhead. With the + // correct madvise information, allocating 4 contiguous pages and mapping them together, might + // be a useful future optimization. + // + // TODO: Readahead backwards, i.e. MAP_GROWSDOWN. + + let mut allow_writable = true; + + let frame = match grant_info.provider { + Provider::Allocated { .. } | Provider::AllocatedShared { .. } + if access == AccessMode::Write => + { + match faulting_pageinfo_opt { + Some((_, None)) => unreachable!("allocated page needs frame to be valid"), + Some((frame, Some(info))) => { + if info.allows_writable() { + frame + } else { + let result = cow(frame, info, RefKind::Cow)?; + if let Some(old_frame) = result.old_frame { + flusher.queue(old_frame, None, TlbShootdownActions::FREE); + } + result.new_frame + } } + _ => map_zeroed( + &mut addr_space.table.utable, + faulting_page, + grant_flags, + true, + )?, } + } - flush_all.flush(&mut active_table); + Provider::Allocated { .. } | Provider::AllocatedShared { .. } => { + match faulting_pageinfo_opt { + Some((_, None)) => unreachable!("allocated page needs frame to be valid"), - if clear { - unsafe { - intrinsics::write_bytes((self.start.get() + self.size) as *mut u8, 0, new_size - self.size); + // TODO: Can this match arm even be reached? In other words, can the TLB cache + // remember that pages are not present? + Some((frame, Some(page_info))) => { + // Keep in mind that allow_writable must always be true if this code is reached + // for AllocatedShared, since shared pages cannot be mapped lazily (without + // using AddrSpace backrefs). + allow_writable = page_info.allows_writable(); + + frame } - } - } else if new_size < self.size { - let mut flush_all = MapperFlushAll::new(); - let start_page = Page::containing_address(VirtualAddress::new(self.start.get() + new_size)); - let end_page = Page::containing_address(VirtualAddress::new(self.start.get() + self.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_some() { - let result = active_table.unmap(page); - flush_all.consume(result); + None => { + // TODO: the zeroed page first, readonly? + map_zeroed( + &mut addr_space.table.utable, + faulting_page, + grant_flags, + false, + )? } } + } + Provider::PhysBorrowed { base } => base.next_by(pages_from_grant_start), + Provider::External { + address_space: ref foreign_address_space, + src_base, + .. + } => { + let foreign_address_space = Arc::clone(foreign_address_space); + + if Arc::ptr_eq(addr_space_lock, &foreign_address_space) { + return Err(PfError::NonfatalInternalError); + } - flush_all.flush(&mut active_table); + let mut guard = foreign_address_space.acquire_upgradeable_read(); + let src_page = src_base.next_by(pages_from_grant_start); + + if let Some(_) = guard.grants.contains(src_page) { + let src_frame = if let Some((phys, _)) = + guard.table.utable.translate(src_page.start_address()) + { + Frame::containing(phys) + } else { + // Grant was valid (TODO check), but we need to correct the underlying page. + // TODO: Access mode + + // TODO: Reasonable maximum? + let new_recursion_level = recursion_level + .checked_add(1) + .filter(|new_lvl| *new_lvl < 16) + .ok_or(PfError::RecursionLimitExceeded)?; + + drop(guard); + drop(flusher); + drop(addr_space_guard); + + // FIXME: Can this result in invalid address space state? + let ext_addrspace = &foreign_address_space; + let (frame, _, _) = { + let g = ext_addrspace.acquire_write(); + correct_inner( + ext_addrspace, + g, + src_page, + AccessMode::Read, + new_recursion_level, + )? + }; + + addr_space_guard = addr_space_lock.acquire_write(); + addr_space = &mut *addr_space_guard; + flusher = + Flusher::with_cpu_set(&mut addr_space.used_by, &addr_space_lock.tlb_ack); + guard = foreign_address_space.acquire_upgradeable_read(); + + frame + }; + + let info = get_page_info(src_frame).expect("all allocated frames need a PageInfo"); + + match info.add_ref(RefKind::Shared) { + Ok(()) => src_frame, + Err(AddRefError::CowToShared) => { + let CowResult { + new_frame, + old_frame, + } = cow(src_frame, info, RefKind::Shared)?; + + if let Some(old_frame) = old_frame { + flusher.queue(old_frame, None, TlbShootdownActions::FREE); + flusher.flush(); + } + + let mut guard = RwLockUpgradableGuard::upgrade(guard); + + // TODO: flusher + unsafe { + guard + .table + .utable + .remap_with_full(src_page.start_address(), |_, f| { + (new_frame.base(), f) + }); + } + + new_frame + } + Err(AddRefError::SharedToCow) => unreachable!(), + Err(AddRefError::RcOverflow) => return Err(PfError::Oom), + } + } else { + // Grant did not exist, but we did own a Provider::External mapping, and cannot + // simply let the current context fail. TODO: But all borrowed memory shouldn't + // really be lazy though? TODO: Should a grant be created? + + let mut guard = RwLockUpgradableGuard::upgrade(guard); + + // TODO: Should this be called? + log::warn!("Mapped zero page since grant didn't exist"); + map_zeroed( + &mut guard.table.utable, + src_page, + grant_flags, + access == AccessMode::Write, + )? + } + } + // TODO: NonfatalInternalError if !MAP_LAZY and this page fault occurs. + Provider::FmapBorrowed { ref file_ref, .. } => { + let file_ref = file_ref.clone(); + let flags = map_flags(grant_info.flags()); + drop(flusher); + drop(addr_space_guard); + + let (scheme_id, scheme_number) = match file_ref.description.read() { + ref desc => (desc.scheme, desc.number), + }; + let user_inner = scheme::schemes() + .get(scheme_id) + .and_then(|s| { + if let KernelSchemes::User(user) = s { + user.inner.upgrade() + } else { + None + } + }) + .ok_or(PfError::Segv)?; + + let offset = file_ref.base_offset as u64 + (pages_from_grant_start * PAGE_SIZE) as u64; + user_inner + .request_fmap(scheme_number, offset, 1, flags) + .unwrap(); + + let context_lock = crate::context::current(); + context_lock + .write() + .hard_block(HardBlockedReason::AwaitingMmap { file_ref }); + + super::switch(); + + let frame = context_lock + .write() + .fmap_ret + .take() + .ok_or(PfError::NonfatalInternalError)?; + + addr_space_guard = addr_space_lock.acquire_write(); + addr_space = &mut *addr_space_guard; + flusher = Flusher::with_cpu_set(&mut addr_space.used_by, &addr_space_lock.tlb_ack); + + log::info!("Got frame {:?} from external fmap", frame); + + frame } + }; + + let new_flags = grant_flags.write(grant_flags.has_write() && allow_writable); + let Some(flush) = (unsafe { + addr_space + .table + .utable + .map_phys(faulting_page.start_address(), frame.base(), new_flags) + }) else { + // TODO + return Err(PfError::Oom); + }; + + drop(flusher); + Ok((frame, flush, addr_space_guard)) +} + +#[derive(Debug)] +pub enum MmapMode { + Cow, + Shared, +} - self.size = new_size; +pub struct BorrowedFmapSource<'a> { + pub src_base: Page, + pub mode: MmapMode, + // TODO: There should be a method that obtains the lock from the guard. + pub addr_space_lock: &'a Arc, + pub addr_space_guard: RwLockWriteGuard<'a, AddrSpace>, +} + +pub fn handle_notify_files(notify_files: Vec) { + for file in notify_files { + let _ = file.unmap(); } } -impl Drop for Memory { - fn drop(&mut self) { - self.unmap(); +pub enum CopyMappingsMode { + Owned { cow_file_ref: Option }, + Borrowed, +} + +// TODO: Check if polymorphism is worth it in terms of code size performance penalty vs optimized +// away checks. +pub trait GenericFlusher { + // TODO: Don't require a frame unless FREE, require Page otherwise + fn queue( + &mut self, + frame: Frame, + phys_contiguous_count: Option, + actions: TlbShootdownActions, + ); +} +pub struct NopFlusher; +impl GenericFlusher for NopFlusher { + fn queue( + &mut self, + frame: Frame, + phys_contiguous_count: Option, + actions: TlbShootdownActions, + ) { + if actions.contains(TlbShootdownActions::FREE) { + handle_free_action(frame, phys_contiguous_count); + } + } +} +fn handle_free_action(base: Frame, phys_contiguous_count: Option) { + if let Some(count) = phys_contiguous_count { + for i in 0..count.get() { + let new_rc = get_page_info(base.next_by(i)) + .expect("phys_contiguous frames all need PageInfos") + .remove_ref(); + + assert_eq!(new_rc, None); + } + unsafe { + let order = count.get().next_power_of_two().trailing_zeros(); + deallocate_p2frame(base, order); + } + } else { + let Some(info) = get_page_info(base) else { + return; + }; + if info.remove_ref() == None { + unsafe { + deallocate_frame(base); + } + } } } +struct FlusherState<'addrsp> { + // TODO: what capacity? + pagequeue: ArrayVec, + dirty: bool, -#[derive(Debug)] -pub struct Tls { - pub master: VirtualAddress, - pub file_size: usize, - pub mem: Memory, - pub offset: usize, -} - -impl Tls { - /// Load TLS data from master - pub unsafe fn load(&mut self) { - intrinsics::copy( - self.master.get() as *const u8, - (self.mem.start_address().get() + self.offset) as *mut u8, - self.file_size + ackword: &'addrsp AtomicU32, +} + +enum PageQueueEntry { + Free { + base: Frame, + phys_contiguous_count: Option, + }, + Other { + actions: TlbShootdownActions, + //page: Page, + }, +} + +pub struct Flusher<'guard, 'addrsp> { + active_cpus: &'guard mut LogicalCpuSet, + state: FlusherState<'addrsp>, +} +impl<'guard, 'addrsp> Flusher<'guard, 'addrsp> { + fn with_cpu_set(set: &'guard mut LogicalCpuSet, ackword: &'addrsp AtomicU32) -> Self { + Self { + active_cpus: set, + state: FlusherState { + pagequeue: ArrayVec::new(), + dirty: false, + ackword, + }, + } + } + fn detach(mut self) -> FlusherState<'addrsp> { + static DUMMY: AtomicU32 = AtomicU32::new(0); + let state = core::mem::replace( + &mut self.state, + FlusherState { + pagequeue: ArrayVec::new(), + ackword: &DUMMY, + dirty: false, + }, ); + core::mem::forget(self); + state + } + // NOTE: Lock must be held, which must be guaranteed by the caller. + pub fn flush(&mut self) { + let pages = core::mem::take(&mut self.state.pagequeue); + + if pages.is_empty() && core::mem::replace(&mut self.state.dirty, false) == false { + return; + } + + self.state.ackword.store(0, Ordering::SeqCst); + + let mut affected_cpu_count = 0; + + let current_cpu_id = crate::cpu_id(); + + for cpu_id in self.active_cpus.iter_mut() { + if cpu_id == current_cpu_id { + continue; + } + + crate::percpu::shootdown_tlb_ipi(Some(cpu_id)); + affected_cpu_count += 1; + } + + if self.active_cpus.contains(current_cpu_id) { + rmm::PageFlushAll::::new().flush(); + } + + while self.state.ackword.load(Ordering::SeqCst) < affected_cpu_count { + PercpuBlock::current().maybe_handle_tlb_shootdown(); + core::hint::spin_loop(); + } + + for entry in pages { + let PageQueueEntry::Free { + base, + phys_contiguous_count, + } = entry + else { + continue; + }; + handle_free_action(base, phys_contiguous_count); + } + } +} +impl GenericFlusher for Flusher<'_, '_> { + fn queue( + &mut self, + frame: Frame, + phys_contiguous_count: Option, + actions: TlbShootdownActions, + ) { + let actions = actions & !TlbShootdownActions::NEW_MAPPING; + + let entry = if actions.contains(TlbShootdownActions::FREE) { + PageQueueEntry::Free { + base: frame, + phys_contiguous_count, + } + } else { + PageQueueEntry::Other { actions } + }; + self.state.dirty = true; + + if self.state.pagequeue.is_full() { + self.flush(); + } + self.state.pagequeue.push(entry); + } +} +impl Drop for Flusher<'_, '_> { + fn drop(&mut self) { + self.flush(); + } +} +bitflags::bitflags! { + pub struct TlbShootdownActions: usize { + // Delay the deallocation of one or more contiguous frames. + const FREE = 1; + + // Revoke various access flags from a page + const REVOKE_READ = 1 << 1; + const REVOKE_WRITE = 1 << 2; + const REVOKE_EXEC = 1 << 3; + + // Unmap a page from one address space without deallocating it. + const MOVE = 1 << 4; + + // Add a new mapping to an address space. + // Not really a TLB shootdown action on most architectures, so almost always a no-op. + const NEW_MAPPING = 1 << 31; + } +} +impl TlbShootdownActions { + pub fn change_of_flags(old: PageFlags, new: PageFlags) -> Self { + let mut this = Self::empty(); + this.set(Self::REVOKE_WRITE, old.has_write() && !new.has_write()); + this.set(Self::REVOKE_EXEC, old.has_execute() && !new.has_execute()); + this } } diff --git a/src/context/mod.rs b/src/context/mod.rs index b7024334..e29d254e 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -1,26 +1,50 @@ //! # Context management //! //! For resources on contexts, please consult [wikipedia](https://en.wikipedia.org/wiki/Context_switch) and [osdev](https://wiki.osdev.org/Context_Switching) -use alloc::boxed::Box; -use core::alloc::{GlobalAlloc, Layout}; -use core::sync::atomic::Ordering; -use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; -pub use self::context::{Context, ContextId, Status, WaitpidKey}; -pub use self::list::ContextList; -pub use self::switch::switch; +use core::num::NonZeroUsize; +use alloc::{borrow::Cow, collections::BTreeSet, sync::Arc}; + +use spin::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use spinning_top::RwSpinlock; +use syscall::ENOMEM; + +use crate::{ + context::memory::AddrSpaceWrapper, + cpu_set::LogicalCpuSet, + paging::{RmmA, RmmArch, TableKind}, + percpu::PercpuBlock, + syscall::error::{Error, Result}, +}; + +use self::context::Kstack; +pub use self::{ + context::{BorrowedHtBuf, Context, Status}, + switch::switch, +}; + +#[cfg(target_arch = "aarch64")] +#[path = "arch/aarch64.rs"] +mod arch; + +#[cfg(target_arch = "x86")] +#[path = "arch/x86.rs"] +mod arch; + +#[cfg(target_arch = "x86_64")] #[path = "arch/x86_64.rs"] mod arch; -/// Context struct -mod context; +#[cfg(target_arch = "riscv64")] +#[path = "arch/riscv64.rs"] +mod arch; -/// Context list -mod list; +/// Context struct +pub mod context; /// Context switch function -mod switch; +pub mod switch; /// File struct - defines a scheme and a file number pub mod file; @@ -34,52 +58,121 @@ pub mod signal; /// Timeout handling pub mod timeout; -/// Limit on number of contexts -pub const CONTEXT_MAX_CONTEXTS: usize = (isize::max_value() as usize) - 1; +pub use self::switch::switch_finish_hook; /// Maximum context files pub const CONTEXT_MAX_FILES: usize = 65_536; -/// Contexts list -static CONTEXTS: Once> = Once::new(); +pub use self::arch::empty_cr3; -#[thread_local] -static CONTEXT_ID: context::AtomicContextId = context::AtomicContextId::default(); +// Set of weak references to all contexts available for scheduling. The only strong references are +// the context file descriptors. +static CONTEXTS: RwLock> = RwLock::new(BTreeSet::new()); pub fn init() { - let mut contexts = contexts_mut(); - let context_lock = contexts.new_context().expect("could not initialize first context"); - let mut context = context_lock.write(); - let mut fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(512, 16)) as *mut [u8; 512]) }; - for b in fx.iter_mut() { - *b = 0; - } + let owner = None; // kmain not owned by any fd + let mut context = Context::new(owner).expect("failed to create kmain context"); + context.sched_affinity = LogicalCpuSet::empty(); + context.sched_affinity.atomic_set(crate::cpu_id()); + + context.name.clear(); + context.name.push_str("[kmain]"); + + self::arch::EMPTY_CR3.call_once(|| unsafe { RmmA::table(TableKind::User) }); - context.arch.set_fx(fx.as_ptr() as usize); - context.kfx = Some(fx); context.status = Status::Runnable; context.running = true; context.cpu_id = Some(crate::cpu_id()); - CONTEXT_ID.store(context.id, Ordering::SeqCst); -} -/// Initialize contexts, called if needed -fn init_contexts() -> RwLock { - RwLock::new(ContextList::new()) + let context_lock = Arc::new(RwSpinlock::new(context)); + + CONTEXTS + .write() + .insert(ContextRef(Arc::clone(&context_lock))); + + unsafe { + let percpu = PercpuBlock::current(); + percpu + .switch_internals + .set_current_context(Arc::clone(&context_lock)); + percpu.switch_internals.set_idle_context(context_lock); + } } /// Get the global schemes list, const -pub fn contexts() -> RwLockReadGuard<'static, ContextList> { - //call once will init_contexts only once during the kernel's exececution, otherwise it will return the current context via a - //cache. - CONTEXTS.call_once(init_contexts).read() +pub fn contexts() -> RwLockReadGuard<'static, BTreeSet> { + CONTEXTS.read() } /// Get the global schemes list, mutable -pub fn contexts_mut() -> RwLockWriteGuard<'static, ContextList> { - CONTEXTS.call_once(init_contexts).write() +pub fn contexts_mut() -> RwLockWriteGuard<'static, BTreeSet> { + CONTEXTS.write() +} + +pub fn current() -> Arc> { + PercpuBlock::current() + .switch_internals + .with_context(|context| Arc::clone(context)) +} +pub fn try_current() -> Option>> { + PercpuBlock::current() + .switch_internals + .try_with_context(|context| context.map(Arc::clone)) +} +pub fn is_current(context: &Arc>) -> bool { + PercpuBlock::current() + .switch_internals + .with_context(|current| Arc::ptr_eq(context, current)) +} + +pub struct ContextRef(pub Arc>); +impl ContextRef { + pub fn upgrade(&self) -> Option>> { + Some(Arc::clone(&self.0)) + } } -pub fn context_id() -> ContextId { - CONTEXT_ID.load(Ordering::SeqCst) +impl Ord for ContextRef { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + Ord::cmp(&Arc::as_ptr(&self.0), &Arc::as_ptr(&other.0)) + } +} +impl PartialOrd for ContextRef { + fn partial_cmp(&self, other: &Self) -> Option { + Some(Ord::cmp(self, other)) + } +} +impl PartialEq for ContextRef { + fn eq(&self, other: &Self) -> bool { + Ord::cmp(self, other) == core::cmp::Ordering::Equal + } +} +impl Eq for ContextRef {} + +/// Spawn a context from a function. +pub fn spawn( + userspace_allowed: bool, + owner_proc_id: Option, + func: extern "C" fn(), +) -> Result>> { + let stack = Kstack::new()?; + + let context_lock = Arc::try_new(RwSpinlock::new(Context::new(owner_proc_id)?)) + .map_err(|_| Error::new(ENOMEM))?; + + CONTEXTS + .write() + .insert(ContextRef(Arc::clone(&context_lock))); + + { + let mut context = context_lock.write(); + let _ = context.set_addr_space(Some(AddrSpaceWrapper::new()?)); + context + .arch + .setup_initial_call(&stack, func, userspace_allowed); + + context.kstack = Some(stack); + context.userspace = userspace_allowed; + } + Ok(context_lock) } diff --git a/src/context/signal.rs b/src/context/signal.rs index d5d2ff18..91c46462 100644 --- a/src/context/signal.rs +++ b/src/context/signal.rs @@ -1,118 +1,103 @@ -use alloc::sync::Arc; -use core::mem; - -use crate::context::{contexts, switch, Status, WaitpidKey}; -use crate::start::usermode; -use crate::{ptrace, syscall}; -use crate::syscall::flag::{PTRACE_EVENT_SIGNAL, PTRACE_SIGNAL, SIG_DFL, SIG_IGN, SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU}; -use crate::syscall::data::{PtraceEvent, PtraceEventData}; - -pub fn is_user_handled(handler: Option) -> bool { - let handler = handler.map(|ptr| ptr as usize).unwrap_or(0); - handler != SIG_DFL && handler != SIG_IGN -} +use core::sync::atomic::Ordering; -pub extern "C" fn signal_handler(sig: usize) { - let (action, restorer) = { - let contexts = contexts(); - let context_lock = contexts.current().expect("context::signal_handler not inside of context"); - let context = context_lock.read(); - let actions = context.actions.lock(); - actions[sig] - }; +use crate::{context, syscall::flag::SigcontrolFlags}; + +pub fn signal_handler() { + let context_lock = context::current(); + let mut context_guard = context_lock.write(); + let context = &mut *context_guard; - ptrace::send_event(PtraceEvent { - tag: PTRACE_EVENT_SIGNAL, - data: PtraceEventData { signal: sig } - }); - - let handler = action.sa_handler.map(|ptr| ptr as usize).unwrap_or(0); - if handler == SIG_DFL { - match sig { - SIGCHLD => { - // println!("SIGCHLD"); - }, - SIGCONT => { - // println!("Continue"); - - { - let contexts = contexts(); - - let (pid, pgid, ppid) = { - let context_lock = contexts.current().expect("context::signal_handler not inside of context"); - let mut context = context_lock.write(); - context.status = Status::Runnable; - (context.id, context.pgid, context.ppid) - }; - - if let Some(parent_lock) = contexts.get(ppid) { - let waitpid = { - let parent = parent_lock.write(); - Arc::clone(&parent.waitpid) - }; - - waitpid.send(WaitpidKey { - pid: Some(pid), - pgid: Some(pgid) - }, (pid, 0xFFFF)); - } else { - println!("{}: {} not found for continue", pid.into(), ppid.into()); - } - } - }, - SIGSTOP | SIGTSTP | SIGTTIN | SIGTTOU => { - // println!("Stop {}", sig); - - { - let contexts = contexts(); - - let (pid, pgid, ppid) = { - let context_lock = contexts.current().expect("context::signal_handler not inside of context"); - let mut context = context_lock.write(); - context.status = Status::Stopped(sig); - (context.id, context.pgid, context.ppid) - }; - - if let Some(parent_lock) = contexts.get(ppid) { - let waitpid = { - let parent = parent_lock.write(); - Arc::clone(&parent.waitpid) - }; - - waitpid.send(WaitpidKey { - pid: Some(pid), - pgid: Some(pgid) - }, (pid, (sig << 8) | 0x7F)); - } else { - println!("{}: {} not found for stop", pid.into(), ppid.into()); - } - } - - unsafe { switch() }; - }, - _ => { - // println!("Exit {}", sig); - syscall::exit(sig); - } - } - } else if handler == SIG_IGN { - // println!("Ignore"); - } else { - // println!("Call {:X}", handler); - - ptrace::breakpoint_callback(PTRACE_SIGNAL); - - unsafe { - let mut sp = crate::USER_SIGSTACK_OFFSET + crate::USER_SIGSTACK_SIZE - 256; - - sp = (sp / 16) * 16; - - sp -= mem::size_of::(); - *(sp as *mut usize) = restorer; - - usermode(handler, sp, sig); - } + let being_sigkilled = context.being_sigkilled; + + if being_sigkilled { + drop(context_guard); + drop(context_lock); + crate::syscall::process::exit_this_context(None); } - syscall::sigreturn().unwrap(); + /*let thumbs_down = ptrace::breakpoint_callback( + PTRACE_STOP_SIGNAL, + Some(ptrace_event!(PTRACE_STOP_SIGNAL)), + ) + .and_then(|_| ptrace::next_breakpoint().map(|f| f.contains(PTRACE_FLAG_IGNORE)));*/ + + // TODO: thumbs_down + let Some((thread_ctl, proc_ctl, st)) = context.sigcontrol() else { + // Discard signal if sigcontrol is unset. + log::trace!("no sigcontrol, returning"); + return; + }; + if thread_ctl.currently_pending_unblocked(proc_ctl) == 0 { + // The context is currently Runnable. When transitioning into Blocked, it will check for + // signals (with the context lock held, which is required when sending signals). After + // that, any detection of pending unblocked signals by the sender, will result in the + // context being unblocked, and signals sent. + + // TODO: prioritize signals over regular program execution + return; + } + let control_flags = + SigcontrolFlags::from_bits_retain(thread_ctl.control_flags.load(Ordering::Acquire)); + + if control_flags.contains(SigcontrolFlags::INHIBIT_DELIVERY) { + // Signals are inhibited to protect critical sections inside libc, but this code will run + // every time the context is switched to. + log::trace!("Inhibiting delivery, returning"); + return; + } + + let sigh_instr_ptr = st.user_handler.get(); + + let Some(regs) = context.regs_mut() else { + // TODO: is this even reachable? + log::trace!("No registers, returning"); + return; + }; + + let ip = regs.instr_pointer(); + let archdep_reg = regs.sig_archdep_reg(); + + regs.set_instr_pointer(sigh_instr_ptr); + + let (thread_ctl, _, _) = context + .sigcontrol() + .expect("cannot have been unset while holding the lock"); + + thread_ctl.saved_ip.set(ip); + thread_ctl.saved_archdep_reg.set(archdep_reg); + + thread_ctl.control_flags.store( + (control_flags | SigcontrolFlags::INHIBIT_DELIVERY).bits(), + Ordering::Release, + ); +} +pub fn excp_handler(excp: syscall::Exception) { + let current = context::current(); + + let mut context = current.write(); + + let Some(eh) = context.sig.as_ref().and_then(|s| s.excp_handler) else { + // TODO: Let procmgr print this? + log::info!( + "UNHANDLED EXCEPTION, CPU {}, PID {}, NAME {}, CONTEXT {current:p}", + crate::cpu_id(), + context.pid, + context.name + ); + drop(context); + // TODO: Allow exceptions to be caught by tracer etc, without necessarily exiting the + // context (closing files, dropping AddrSpace, etc) + crate::syscall::process::exit_this_context(Some(excp)); + }; + // TODO + /* + let Some(regs) = context.regs_mut() else { + // TODO: unhandled exception in this case too? + return; + }; + let old_ip = regs.instr_pointer(); + let old_archdep_reg = regs.ar + let (tctl, pctl, sigst) = context.sigcontrol().expect("already checked"); + tctl.saved_ip.set(excp.rsp); + tctl.saved_archdep_reg*/ } diff --git a/src/context/switch.rs b/src/context/switch.rs index 2a5f6c51..d933501c 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -1,162 +1,396 @@ -use core::sync::atomic::Ordering; - -use crate::context::{arch, contexts, Context, Status, CONTEXT_ID}; -use crate::context::signal::signal_handler; -use crate::gdt; -use crate::interrupt; -use crate::interrupt::irq::PIT_TICKS; -use crate::time; - -unsafe fn update(context: &mut Context, cpu_id: usize) { - // Take ownership if not already owned - if context.cpu_id == None { - context.cpu_id = Some(cpu_id); - // println!("{}: take {} {}", cpu_id, context.id, ::core::str::from_utf8_unchecked(&context.name.lock())); - } +///! This module provides a context-switching mechanism that utilizes a simple round-robin scheduler. +///! The scheduler iterates over available contexts, selecting the next context to run, while +///! handling process states and synchronization. +use core::{ + cell::{Cell, RefCell}, + mem, + ops::Bound, + sync::atomic::Ordering, +}; - // Restore from signal, must only be done from another context to avoid overwriting the stack! - if context.ksig_restore && ! context.running { - let ksig = context.ksig.take().expect("context::switch: ksig not set with ksig_restore"); - context.arch = ksig.0; +use alloc::sync::Arc; +use spinning_top::{guard::ArcRwSpinlockWriteGuard, RwSpinlock}; +use syscall::PtraceFlags; - if let Some(ref mut kfx) = context.kfx { - kfx.clone_from_slice(&ksig.1.expect("context::switch: ksig kfx not set with ksig_restore")); - } else { - panic!("context::switch: kfx not set with ksig_restore"); - } +use crate::{ + context::{arch, contexts, Context}, + cpu_set::LogicalCpuId, + interrupt, + percpu::PercpuBlock, + ptrace, time, +}; - if let Some(ref mut kstack) = context.kstack { - kstack.clone_from_slice(&ksig.2.expect("context::switch: ksig kstack not set with ksig_restore")); - } else { - panic!("context::switch: kstack not set with ksig_restore"); - } +#[cfg(feature = "sys_stat")] +use crate::cpu_stats; - context.ksig_restore = false; +use super::ContextRef; - context.unblock(); +enum UpdateResult { + CanSwitch, + Skip, +} + +/// Determines if a given context is eligible to be scheduled on a given CPU (in +/// principle, the current CPU). +/// +/// # Safety +/// This function is unsafe because it modifies the `context`'s state directly without synchronization. +/// +/// # Parameters +/// - `context`: The context (process/thread) to be checked. +/// - `cpu_id`: The logical ID of the CPU on which the context is being scheduled. +/// +/// # Returns +/// - `UpdateResult::CanSwitch`: If the context can be switched to. +/// - `UpdateResult::Skip`: If the context should be skipped (e.g., it's running on another CPU). +unsafe fn update_runnable(context: &mut Context, cpu_id: LogicalCpuId) -> UpdateResult { + // Ignore contexts that are already running. + if context.running { + return UpdateResult::Skip; } - // Unblock when there are pending signals - if context.status == Status::Blocked && !context.pending.is_empty() { - context.unblock(); + // Ignore contexts assigned to other CPUs. + if !context.sched_affinity.contains(cpu_id) { + return UpdateResult::Skip; } - // Wake from sleep - if context.status == Status::Blocked && context.wake.is_some() { - let wake = context.wake.expect("context::switch: wake not set"); + //TODO: HACK TO WORKAROUND HANGS BY PINNING TO ONE CPU + if !context.cpu_id.map_or(true, |x| x == cpu_id) { + return UpdateResult::Skip; + } - let current = time::monotonic(); - if current.0 > wake.0 || (current.0 == wake.0 && current.1 >= wake.1) { - context.wake = None; - context.unblock(); + // If context is soft-blocked and has a wake-up time, check if it should wake up. + if context.status.is_soft_blocked() { + if let Some(wake) = context.wake { + let current = time::monotonic(); + if current >= wake { + context.wake = None; + context.unblock_no_ipi(); + } } } + + // If the context is runnable, indicate it can be switched to. + if context.status.is_runnable() { + UpdateResult::CanSwitch + } else { + UpdateResult::Skip + } +} + +struct SwitchResultInner { + _prev_guard: ArcRwSpinlockWriteGuard, + _next_guard: ArcRwSpinlockWriteGuard, } -unsafe fn runnable(context: &Context, cpu_id: usize) -> bool { - // Switch to context if it needs to run, is not currently running, and is owned by the current CPU - !context.running && !context.ptrace_stop && context.status == Status::Runnable && context.cpu_id == Some(cpu_id) +/// Tick function to update PIT ticks and trigger a context switch if necessary. +/// +/// Called periodically, this function increments a per-CPU tick counter and performs a context +/// switch if the counter reaches a set threshold (e.g., every 3 ticks). +/// +/// The function also calls the signal handler after switching contexts. +pub fn tick() { + let ticks_cell = &PercpuBlock::current().switch_internals.pit_ticks; + + let new_ticks = ticks_cell.get() + 1; + ticks_cell.set(new_ticks); + + // Trigger a context switch after every 3 ticks (approx. 6.75 ms). + if new_ticks >= 3 { + switch(); + crate::context::signal::signal_handler(); + } } -/// Switch to the next context +/// Finishes the context switch by clearing any temporary data and resetting the lock. +/// +/// This function is called after a context switch is completed to perform cleanup, including +/// clearing the switch result data and releasing the context switch lock. /// /// # Safety +/// This function involves unsafe operations such as resetting state and releasing locks. +pub unsafe extern "C" fn switch_finish_hook() { + if let Some(switch_result) = PercpuBlock::current().switch_internals.switch_result.take() { + drop(switch_result); + } else { + // TODO: unreachable_unchecked()? + crate::arch::stop::emergency_reset(); + } + arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst); + crate::percpu::switch_arch_hook(); +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SwitchResult { + Switched, + AllContextsIdle, +} + +/// Selects and switches to the next context using a round-robin scheduler. +/// +/// This function performs the context switch, checking each context in a loop for eligibility +/// until it finds a context ready to run. If no other context is runnable, it returns to the +/// idle context. +/// +/// # Warning +/// This is not memory-unsafe to call. But do NOT call this while holding locks! /// -/// Do not call this while holding locks! -pub unsafe fn switch() -> bool { - use core::ops::DerefMut; +/// # Returns +/// - `SwitchResult::Switched`: Indicates a successful switch to a new context. +/// - `SwitchResult::AllContextsIdle`: Indicates all contexts are idle, and the CPU will switch +/// to an idle context. +pub fn switch() -> SwitchResult { + let percpu = PercpuBlock::current(); + #[cfg(feature = "sys_stat")] + { + cpu_stats::add_context_switch(); + percpu + .stats + .add_time(percpu.switch_internals.pit_ticks.get()); + } //set PIT Interrupt counter to 0, giving each process same amount of PIT ticks - PIT_TICKS.store(0, Ordering::SeqCst); + percpu.switch_internals.pit_ticks.set(0); - // Set the global lock to avoid the unsafe operations below from causing issues - while arch::CONTEXT_SWITCH_LOCK.compare_and_swap(false, true, Ordering::SeqCst) { + // Acquire the global lock to ensure exclusive access during context switch and avoid + // issues that would be caused by the unsafe operations below + // TODO: Better memory orderings? + while arch::CONTEXT_SWITCH_LOCK + .compare_exchange_weak(false, true, Ordering::SeqCst, Ordering::Relaxed) + .is_err() + { interrupt::pause(); + percpu.maybe_handle_tlb_shootdown(); } let cpu_id = crate::cpu_id(); - let from_ptr; - let mut to_ptr = 0 as *mut Context; - let mut to_sig = None; + let mut switch_context_opt = None; { let contexts = contexts(); + + // Lock the previous context. + let prev_context_lock = crate::context::current(); + let prev_context_guard = prev_context_lock.write_arc(); + + let idle_context = percpu.switch_internals.idle_context(); + + // Stateful flag used to skip the idle process the first time it shows up. + // After that, this flag is set to `false` so the idle process can be + // picked up. + let mut skip_idle = true; + + // Attempt to locate the next context to switch to. + for next_context_lock in contexts + // Include all contexts with IDs greater than the current... + .range(( + Bound::Excluded(ContextRef(Arc::clone(&prev_context_lock))), + Bound::Unbounded, + )) + // ... and all contexts with IDs less than the current... + .chain(contexts.range(( + Bound::Unbounded, + Bound::Excluded(ContextRef(Arc::clone(&prev_context_lock))), + ))) + .filter_map(ContextRef::upgrade) + // ... and the idle context... + .chain(Some(Arc::clone(&idle_context))) + // ... but not the current context (note the `Bound::Excluded`), + // which is already locked. { - let context_lock = contexts - .current() - .expect("context::switch: not inside of context"); - let mut context = context_lock.write(); - from_ptr = context.deref_mut() as *mut Context; + if Arc::ptr_eq(&next_context_lock, &idle_context) && skip_idle { + // Skip idle process the first time it shows up, but allow it + // to be picked up again the next time. + skip_idle = false; + continue; + } + + // Lock next context + let mut next_context_guard = next_context_lock.write_arc(); + + // Check if the context is runnable and can be switched to. + if let UpdateResult::CanSwitch = + unsafe { update_runnable(&mut *next_context_guard, cpu_id) } + { + // Store locks for previous and next context and break out from loop + // for the switch + switch_context_opt = Some((prev_context_guard, next_context_guard)); + break; + } } + }; - for (_pid, context_lock) in contexts.iter() { - let mut context = context_lock.write(); - update(&mut context, cpu_id); + // Switch process states, TSS stack pointer, and store new context ID + if let Some((mut prev_context_guard, mut next_context_guard)) = switch_context_opt { + // Update context states and prepare for the switch. + let prev_context = &mut *prev_context_guard; + let next_context = &mut *next_context_guard; + + // Set the previous context as "not running" + prev_context.running = false; + + // Set the next context as "running" + next_context.running = true; + // Set the CPU ID for the next context + next_context.cpu_id = Some(cpu_id); + + let percpu = PercpuBlock::current(); + unsafe { + percpu.switch_internals.set_current_context(Arc::clone( + ArcRwSpinlockWriteGuard::rwlock(&next_context_guard), + )); } - for (pid, context_lock) in contexts.iter() { - if *pid > (*from_ptr).id { - let mut context = context_lock.write(); - if runnable(&mut context, cpu_id) { - to_ptr = context.deref_mut() as *mut Context; - if (&mut *to_ptr).ksig.is_none() { - to_sig = context.pending.pop_front(); - } - break; - } - } + // FIXME set the switch result in arch::switch_to instead + let prev_context = + unsafe { mem::transmute::<&'_ mut Context, &'_ mut Context>(&mut *prev_context_guard) }; + let next_context = + unsafe { mem::transmute::<&'_ mut Context, &'_ mut Context>(&mut *next_context_guard) }; + + percpu + .switch_internals + .switch_result + .set(Some(SwitchResultInner { + _prev_guard: prev_context_guard, + _next_guard: next_context_guard, + })); + + /*let (ptrace_session, ptrace_flags) = if let Some((session, bp)) = ptrace::sessions() + .get(&next_context.pid) + .map(|s| (Arc::downgrade(s), s.data.lock().breakpoint)) + { + (Some(session), bp.map_or(PtraceFlags::empty(), |f| f.flags)) + } else { + (None, PtraceFlags::empty()) + };*/ + let ptrace_flags = PtraceFlags::empty(); + + //*percpu.ptrace_session.borrow_mut() = ptrace_session; + percpu.ptrace_flags.set(ptrace_flags); + prev_context.inside_syscall = percpu.inside_syscall.replace(next_context.inside_syscall); + + #[cfg(feature = "syscall_debug")] + { + prev_context.syscall_debug_info = percpu + .syscall_debug_info + .replace(next_context.syscall_debug_info); + prev_context.syscall_debug_info.on_switch_from(); + next_context.syscall_debug_info.on_switch_to(); + } + + percpu + .switch_internals + .being_sigkilled + .set(next_context.being_sigkilled); + + unsafe { + arch::switch_to(prev_context, next_context); } - if to_ptr as usize == 0 { - for (pid, context_lock) in contexts.iter() { - if *pid < (*from_ptr).id { - let mut context = context_lock.write(); - if runnable(&mut context, cpu_id) { - to_ptr = context.deref_mut() as *mut Context; - if (&mut *to_ptr).ksig.is_none() { - to_sig = context.pending.pop_front(); - } - break; - } - } + // NOTE: After switch_to is called, the return address can even be different from the + // current return address, meaning that we cannot use local variables here, and that we + // need to use the `switch_finish_hook` to be able to release the locks. Newly created + // contexts will return directly to the function pointer passed to context::spawn, and not + // reach this code until the next context switch back. + #[cfg(feature = "sys_stat")] + { + if next_context.userspace { + percpu.stats.set_state(cpu_stats::CpuState::User); + } else { + percpu.stats.set_state(cpu_stats::CpuState::Kernel); } } - }; - // Switch process states, TSS stack pointer, and store new context ID - if to_ptr as usize != 0 { - (&mut *from_ptr).running = false; - (&mut *to_ptr).running = true; - if let Some(ref stack) = (*to_ptr).kstack { - gdt::set_tss_stack(stack.as_ptr() as usize + stack.len()); + SwitchResult::Switched + } else { + // No target was found, unset global lock and return + arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst); + + #[cfg(feature = "sys_stat")] + { + percpu.stats.set_state(cpu_stats::CpuState::Idle); } - gdt::set_tcb((&mut *to_ptr).id.into()); - CONTEXT_ID.store((&mut *to_ptr).id, Ordering::SeqCst); + + SwitchResult::AllContextsIdle } +} - // Unset global lock before switch, as arch is only usable by the current CPU at this time - arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst); +/// Holds per-CPU state necessary for context switching. +/// +/// This struct contains information such as the idle context, current context, and PIT tick counts, +/// as well as fields required for managing ptrace sessions and signals. +#[derive(Default)] +pub struct ContextSwitchPercpu { + switch_result: Cell>, + pit_ticks: Cell, - if to_ptr as usize == 0 { - // No target was found, return + current_ctxt: RefCell>>>, - false - } else { - if let Some(sig) = to_sig { - // Signal was found, run signal handler + /// The idle process. + idle_ctxt: RefCell>>>, - //TODO: Allow nested signals - assert!((&mut *to_ptr).ksig.is_none()); + pub(crate) being_sigkilled: Cell, +} - let arch = (&mut *to_ptr).arch.clone(); - let kfx = (&mut *to_ptr).kfx.clone(); - let kstack = (&mut *to_ptr).kstack.clone(); - (&mut *to_ptr).ksig = Some((arch, kfx, kstack, sig)); - (&mut *to_ptr).arch.signal_stack(signal_handler, sig); - } +impl ContextSwitchPercpu { + /// Applies a function to the current context, allowing controlled access. + /// + /// # Parameters + /// - `f`: A closure that receives a reference to the current context and returns a value. + /// + /// # Returns + /// The result of applying `f` to the current context. + pub fn with_context(&self, f: impl FnOnce(&Arc>) -> T) -> T { + f(self + .current_ctxt + .borrow() + .as_ref() + .expect("not inside of context")) + } - (&mut *from_ptr).arch.switch_to(&mut (&mut *to_ptr).arch); + /// Applies a function to the current context, allowing controlled access. + /// + /// # Parameters + /// - `f`: A closure that receives a reference to the current context and returns a value. + /// + /// # Returns + /// The result of applying `f` to the current context if any. + pub fn try_with_context(&self, f: impl FnOnce(Option<&Arc>>) -> T) -> T { + f(self.current_ctxt.borrow().as_ref()) + } + + /// Sets the current context to a new value. + /// + /// # Safety + /// This function is unsafe as it modifies the context state directly. + /// + /// # Parameters + /// - `new`: The new context to be set as the current context. + pub unsafe fn set_current_context(&self, new: Arc>) { + *self.current_ctxt.borrow_mut() = Some(new); + } + + /// Sets the idle context to a new value. + /// + /// # Safety + /// This function is unsafe as it modifies the idle context state directly. + /// + /// # Parameters + /// - `new`: The new context to be set as the idle context. + pub unsafe fn set_idle_context(&self, new: Arc>) { + *self.idle_ctxt.borrow_mut() = Some(new); + } - true + /// Retrieves the current idle context. + /// + /// # Returns + /// A reference to the idle context. + pub fn idle_context(&self) -> Arc> { + Arc::clone( + self.idle_ctxt + .borrow() + .as_ref() + .expect("no idle context present"), + ) } } diff --git a/src/context/timeout.rs b/src/context/timeout.rs index c26eb63a..a1c23d5b 100644 --- a/src/context/timeout.rs +++ b/src/context/timeout.rs @@ -1,18 +1,22 @@ use alloc::collections::VecDeque; -use spin::{Once, Mutex, MutexGuard}; +use spin::{Mutex, MutexGuard, Once}; -use crate::event; -use crate::scheme::SchemeId; -use crate::syscall::data::TimeSpec; -use crate::syscall::flag::{CLOCK_MONOTONIC, CLOCK_REALTIME, EVENT_READ}; -use crate::time; +use crate::{ + event, + scheme::SchemeId, + syscall::{ + data::TimeSpec, + flag::{CLOCK_MONOTONIC, CLOCK_REALTIME, EVENT_READ}, + }, + time, +}; #[derive(Debug)] struct Timeout { pub scheme_id: SchemeId, pub event_id: usize, pub clock: usize, - pub time: (u64, u64), + pub time: u128, } type Registry = VecDeque; @@ -32,10 +36,10 @@ fn registry() -> MutexGuard<'static, Registry> { pub fn register(scheme_id: SchemeId, event_id: usize, clock: usize, time: TimeSpec) { let mut registry = registry(); registry.push_back(Timeout { - scheme_id: scheme_id, - event_id: event_id, - clock: clock, - time: (time.tv_sec as u64, time.tv_nsec as u64) + scheme_id, + event_id, + clock, + time: (time.tv_sec as u128 * time::NANOS_PER_SEC) + (time.tv_nsec as u128), }); } @@ -50,12 +54,12 @@ pub fn trigger() { let trigger = match registry[i].clock { CLOCK_MONOTONIC => { let time = registry[i].time; - mono.0 > time.0 || (mono.0 == time.0 && mono.1 >= time.1) - }, + mono >= time + } CLOCK_REALTIME => { let time = registry[i].time; - real.0 > time.0 || (real.0 == time.0 && real.1 >= time.1) - }, + real >= time + } clock => { println!("timeout::trigger: unknown clock {}", clock); true diff --git a/src/cpu_set.rs b/src/cpu_set.rs new file mode 100644 index 00000000..80060f94 --- /dev/null +++ b/src/cpu_set.rs @@ -0,0 +1,126 @@ +use core::sync::atomic::{AtomicUsize, Ordering}; + +use alloc::string::{String, ToString}; + +/// A unique number used internally by the kernel to identify CPUs. +/// +/// This is usually but not necessarily the same as the APIC ID. + +// TODO: Differentiate between logical CPU IDs and hardware CPU IDs (e.g. APIC IDs) +#[derive(Clone, Copy, Eq, PartialEq, Hash)] +// TODO: NonMaxUsize? +// TODO: Optimize away this type if not cfg!(feature = "multi_core") +pub struct LogicalCpuId(u32); + +impl LogicalCpuId { + pub const BSP: Self = Self::new(0); + + pub const fn new(inner: u32) -> Self { + Self(inner) + } + pub const fn get(self) -> u32 { + self.0 + } +} + +impl core::fmt::Debug for LogicalCpuId { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "[logical cpu #{}]", self.0) + } +} +impl core::fmt::Display for LogicalCpuId { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "#{}", self.0) + } +} + +#[cfg(target_pointer_width = "64")] +pub const MAX_CPU_COUNT: u32 = 128; + +#[cfg(target_pointer_width = "32")] +pub const MAX_CPU_COUNT: u32 = 32; + +const SET_WORDS: usize = (MAX_CPU_COUNT / usize::BITS) as usize; + +// TODO: Support more than 128 CPUs. +// The maximum number of CPUs on Linux is configurable, and the type for LogicalCpuSet and +// LogicalCpuId may be optimized accordingly. In that case, box the mask if it's larger than some +// base size (probably 256 bytes). +#[derive(Debug)] +pub struct LogicalCpuSet([AtomicUsize; SET_WORDS]); + +fn parts(id: LogicalCpuId) -> (usize, u32) { + ((id.get() / usize::BITS) as usize, id.get() % usize::BITS) +} +impl LogicalCpuSet { + pub const fn empty() -> Self { + const ZEROES: AtomicUsize = AtomicUsize::new(0); + Self([ZEROES; SET_WORDS]) + } + pub const fn all() -> Self { + const ONES: AtomicUsize = AtomicUsize::new(!0); + Self([ONES; SET_WORDS]) + } + pub fn contains(&mut self, id: LogicalCpuId) -> bool { + let (word, bit) = parts(id); + *self.0[word].get_mut() & (1 << bit) != 0 + } + pub fn atomic_set(&self, id: LogicalCpuId) { + let (word, bit) = parts(id); + let _ = self.0[word].fetch_or(1 << bit, Ordering::Release); + } + pub fn atomic_clear(&self, id: LogicalCpuId) { + let (word, bit) = parts(id); + let _ = self.0[word].fetch_and(!(1 << bit), Ordering::Release); + } + + pub fn override_from(&mut self, raw: &RawMask) { + self.0 = raw.map(AtomicUsize::new); + } + pub fn to_raw(&self) -> RawMask { + self.0.each_ref().map(|w| w.load(Ordering::Acquire)) + } + + pub fn iter_mut(&mut self) -> impl Iterator + '_ { + // TODO: Will this be optimized away? + self.0.iter_mut().enumerate().flat_map(move |(i, w)| { + (0..usize::BITS).filter_map(move |b| { + if *w.get_mut() & 1 << b != 0 { + Some(LogicalCpuId::new(i as u32 * usize::BITS + b)) + } else { + None + } + }) + }) + } +} + +impl ToString for LogicalCpuSet { + fn to_string(&self) -> String { + use core::fmt::Write; + + let cpu_count = crate::cpu_count(); + + let mut ret = String::new(); + let raw = self.to_raw(); + let words = raw.get(..(cpu_count / usize::BITS) as usize).unwrap_or(&[]); + for (i, word) in words.iter().enumerate() { + if i != 0 { + write!(ret, "_").unwrap(); + } + let word = if i == words.len() - 1 { + *word & ((1_usize << (cpu_count % usize::BITS)) - 1) + } else { + *word + }; + write!(ret, "{word:x}").unwrap(); + } + ret + } +} + +pub type RawMask = [usize; SET_WORDS]; + +pub fn mask_as_bytes(mask: &RawMask) -> &[u8] { + unsafe { core::slice::from_raw_parts(mask.as_ptr().cast(), core::mem::size_of::()) } +} diff --git a/src/cpu_stats.rs b/src/cpu_stats.rs new file mode 100644 index 00000000..60806e9e --- /dev/null +++ b/src/cpu_stats.rs @@ -0,0 +1,146 @@ +use core::sync::atomic::{AtomicU64, AtomicU8, AtomicUsize, Ordering}; + +use alloc::{string::String, vec::Vec}; + +use crate::cpu_set::LogicalCpuId; + +/// The number of times (overall) where a CPU switched from one context to another. +static CONTEXT_SWITCH_COUNT: AtomicU64 = AtomicU64::new(0); +/// Number of times each Interrupt happened. +static IRQ_COUNT: [AtomicU64; 256] = [const { AtomicU64::new(0) }; 256]; +/// Number of contexts that were created. +static CONTEXTS_COUNT: AtomicU64 = AtomicU64::new(0); + +/// Current state of a CPU +#[repr(u8)] +#[derive(Copy, Clone, Debug, Default)] +pub enum CpuState { + /// Waiting for runnable context + #[default] + Idle = 0, + /// Runnnig a kernel context + Kernel = 1, + /// Running a context in the userspace + User = 2, +} + +/// Statistics for the CPUs. +#[derive(Debug, Default)] +pub struct CpuStats { + /// Number of ticks spent on userspace contexts + user: AtomicUsize, + /// Number of ticks spent on Niced userspace contexts + nice: AtomicUsize, + /// Number of ticks spent on kernel contexts + kernel: AtomicUsize, + /// Number of ticks spent idle + idle: AtomicUsize, + /// Number of times the CPU handled an interrupt + irq: AtomicUsize, + /// Current state of the CPU + state: AtomicU8, +} + +pub struct CpuStatsData { + /// Number of ticks spent on userspace contexts + pub user: usize, + /// Number of ticks spent on Niced userspace contexts + pub nice: usize, + /// Number of ticks spent on kernel contexts + pub kernel: usize, + /// Number of ticks spent idle + pub idle: usize, + /// Number of times the CPU handled an interrupt + pub irq: usize, +} + +impl CpuStats { + /// Set the CPU's current state + /// + /// # Parameters + /// * `new_state` - The state of the CPU for the following ticks. + pub fn set_state(&self, new_state: CpuState) { + self.state.store(new_state as u8, Ordering::Relaxed); + } + + /// Increments time statistics of a CPU + /// + /// Which statistic is incremented depends on the [`State`] of the CPU. + /// + /// # Parameters + /// * `ticks` - NUmber of ticks to add. + pub fn add_time(&self, ticks: usize) { + match self.state.load(Ordering::Relaxed) { + val if val == CpuState::Idle as u8 => self.idle.fetch_add(ticks, Ordering::Relaxed), + val if val == CpuState::User as u8 => self.user.fetch_add(ticks, Ordering::Relaxed), + val if val == CpuState::Kernel as u8 => self.kernel.fetch_add(ticks, Ordering::Relaxed), + _ => unreachable!("all possible values are covered"), + }; + } + + /// Add an IRQ event to both the global count and the CPU that handled it. + /// + /// This should be called in all [`crate::arch::interrupt:irq::eoi`], + /// for all architectures. + /// + /// # Parameters + /// * `irq` - The ID of the interrupt that happened. + pub fn add_irq(&self, irq: u8) { + IRQ_COUNT[irq as usize].fetch_add(1, Ordering::Relaxed); + self.irq.fetch_add(1, Ordering::Relaxed); + } +} + +impl CpuStatsData { + pub fn to_string(&self, cpu_id: LogicalCpuId) -> String { + format!( + "cpu{} {} {} {} {} {}", + cpu_id.get(), + self.user, + self.nice, + self.kernel, + self.idle, + self.irq, + ) + } +} + +impl Into for &CpuStats { + fn into(self) -> CpuStatsData { + CpuStatsData { + user: self.user.load(Ordering::Relaxed), + nice: self.nice.load(Ordering::Relaxed), + kernel: self.kernel.load(Ordering::Relaxed), + idle: self.idle.load(Ordering::Relaxed), + irq: self.irq.load(Ordering::Relaxed), + } + } +} + +/// Add a context switch to the count. +pub fn add_context_switch() { + CONTEXT_SWITCH_COUNT.fetch_add(1, Ordering::Relaxed); +} + +/// Get the number of context switches. +pub fn get_context_switch_count() -> u64 { + CONTEXT_SWITCH_COUNT.load(Ordering::Relaxed) +} + +/// Add a context creation to the count. +pub fn add_context() { + CONTEXTS_COUNT.fetch_add(1, Ordering::Relaxed); +} + +/// Get the number of contexts created. +pub fn get_contexts_count() -> u64 { + CONTEXTS_COUNT.load(Ordering::Relaxed) +} + +/// Get the count of each interrupt. +pub fn irq_counts() -> Vec { + IRQ_COUNT + .iter() + .map(|count| count.load(Ordering::Relaxed)) + .collect() +} diff --git a/src/debugger.rs b/src/debugger.rs new file mode 100644 index 00000000..dd4e0e9e --- /dev/null +++ b/src/debugger.rs @@ -0,0 +1,482 @@ +use crate::{ + context::Context, + paging::{RmmA, RmmArch, TableKind, PAGE_SIZE}, +}; +use spinning_top::RwSpinlock; + +//TODO: combine arches into one function (aarch64 one is newest) + +// Super unsafe due to page table switching and raw pointers! +#[cfg(target_arch = "aarch64")] +pub unsafe fn debugger(target_id: Option) { + use hashbrown::HashSet; + + use crate::memory::{get_page_info, RefCount}; + + println!("DEBUGGER START"); + println!(); + + let mut tree = HashMap::new(); + + let old_table = RmmA::table(TableKind::User); + + let mut spaces = HashSet::new(); + + for (id, context_lock) in crate::context::contexts().iter() { + if target_id.map_or(false, |target_id| *id != target_id) { + continue; + } + let context = context_lock.read(); + println!("{}: {}", (*id).get(), context.name); + + println!("status: {:?}", context.status); + if !context.status_reason.is_empty() { + println!("reason: {}", context.status_reason); + } + + // Switch to context page table to ensure syscall debug and stack dump will work + if let Some(ref space) = context.addr_space { + let new_as = spaces.insert(space.acquire_read().table.utable.table().phys().data()); + + RmmA::set_table( + TableKind::User, + space.acquire_read().table.utable.table().phys(), + ); + check_consistency(&mut *space.acquire_write(), new_as, &mut tree); + + if let Some([a, b, c, d, e, f]) = context.current_syscall() { + println!( + "syscall: {}", + crate::syscall::debug::format_call(a, b, c, d, e, f) + ); + } + + { + let space = space.acquire_read(); + if !space.grants.is_empty() { + println!("grants:"); + for (base, grant) in space.grants.iter() { + println!( + " virt 0x{:016x}:0x{:016x} size 0x{:08x} {:?}", + base.start_address().data(), + base.next_by(grant.page_count() - 1).start_address().data() + 0xFFF, + grant.page_count() * PAGE_SIZE, + grant.provider, + ); + } + } + } + + if let Some(regs) = context.regs() { + println!("regs:"); + regs.dump(); + + let mut sp = regs.iret.sp_el0; + println!("stack: {:>016x}", sp); + //Maximum 64 usizes + for _ in 0..64 { + if context.addr_space.as_ref().map_or(false, |space| { + space + .acquire_read() + .table + .utable + .translate(crate::paging::VirtualAddress::new(sp)) + .is_some() + }) { + let value = *(sp as *const usize); + println!(" {:>016x}: {:>016x}", sp, value); + if let Some(next_sp) = sp.checked_add(core::mem::size_of::()) { + sp = next_sp; + } else { + println!(" {:>016x}: OVERFLOW", sp); + break; + } + } else { + println!(" {:>016x}: GUARD PAGE", sp); + break; + } + } + } + + // Switch to original page table + RmmA::set_table(TableKind::User, old_table); + } + + println!(); + } + for (frame, (count, p)) in tree { + let rc = get_page_info(frame).unwrap().refcount(); + let c = match rc { + RefCount::Zero => 0, + RefCount::One => 1, + RefCount::Cow(c) => c.get(), + RefCount::Shared(s) => s.get(), + }; + if c < count { + println!("undercounted frame {:?} ({} < {})", frame, c, count); + } + } + + println!("DEBUGGER END"); +} + +// Super unsafe due to page table switching and raw pointers! +#[cfg(target_arch = "x86")] +pub unsafe fn debugger(target_id: Option) { + println!("DEBUGGER START"); + println!(); + + let old_table = RmmA::table(TableKind::User); + + for (id, context_lock) in crate::context::contexts().iter() { + if target_id.map_or(false, |target_id| *id != target_id) { + continue; + } + let context = context_lock.read(); + println!("{}: {}", (*id).get(), context.name); + + // Switch to context page table to ensure syscall debug and stack dump will work + if let Some(ref space) = context.addr_space { + RmmA::set_table( + TableKind::User, + space.acquire_read().table.utable.table().phys(), + ); + //TODO check_consistency(&mut space.write()); + } + + println!("status: {:?}", context.status); + if !context.status_reason.is_empty() { + println!("reason: {}", context.status_reason); + } + if let Some([a, b, c, d, e, f]) = context.current_syscall() { + println!( + "syscall: {}", + crate::syscall::debug::format_call(a, b, c, d, e, f) + ); + } + if let Some(ref addr_space) = context.addr_space { + let addr_space = addr_space.acquire_read(); + if !addr_space.grants.is_empty() { + println!("grants:"); + for (base, grant) in addr_space.grants.iter() { + println!( + " virt 0x{:08x}:0x{:08x} size 0x{:08x} {:?}", + base.start_address().data(), + base.next_by(grant.page_count()).start_address().data() + 0xFFF, + grant.page_count() * crate::memory::PAGE_SIZE, + grant.provider, + ); + } + } + } + if let Some(regs) = context.regs() { + println!("regs:"); + regs.dump(); + + let mut sp = regs.iret.esp; + println!("stack: {:>08x}", sp); + //Maximum 64 dwords + for _ in 0..64 { + if context.addr_space.as_ref().map_or(false, |space| { + space + .acquire_read() + .table + .utable + .translate(crate::paging::VirtualAddress::new(sp)) + .is_some() + }) { + let value = *(sp as *const usize); + println!(" {:>08x}: {:>08x}", sp, value); + if let Some(next_sp) = sp.checked_add(core::mem::size_of::()) { + sp = next_sp; + } else { + println!(" {:>08x}: OVERFLOW", sp); + break; + } + } else { + println!(" {:>08x}: GUARD PAGE", sp); + break; + } + } + } + + // Switch to original page table + RmmA::set_table(TableKind::User, old_table); + + println!(); + } + + println!("DEBUGGER END"); +} + +// Super unsafe due to page table switching and raw pointers! +#[cfg(target_arch = "x86_64")] +pub unsafe fn debugger(target_id: Option<*const RwSpinlock>) { + use core::sync::atomic::Ordering; + + use alloc::sync::Arc; + use hashbrown::HashSet; + + use crate::memory::{get_page_info, the_zeroed_frame, RefCount}; + + unsafe { + x86::bits64::rflags::stac(); + } + + println!("DEBUGGER START"); + println!(); + + let mut tree = HashMap::new(); + let mut spaces = HashSet::new(); + + let mut temporarily_taken_htbufs = 0; + + tree.insert(the_zeroed_frame().0, (1, false)); + + let old_table = RmmA::table(TableKind::User); + + for context_lock in crate::context::contexts().iter() { + if target_id.map_or(false, |target_id| Arc::as_ptr(&context_lock.0) != target_id) { + continue; + } + let context = context_lock.0.read(); + println!("{:p}: {}", Arc::as_ptr(&context_lock.0), context.name); + + if let Some(ref head) = context.syscall_head { + tree.insert(head.get(), (1, false)); + } else { + temporarily_taken_htbufs += 1; + } + if let Some(ref tail) = context.syscall_tail { + tree.insert(tail.get(), (1, false)); + } else { + temporarily_taken_htbufs += 1; + } + + // Switch to context page table to ensure syscall debug and stack dump will work + if let Some(ref space) = context.addr_space { + let was_new = spaces.insert(space.acquire_read().table.utable.table().phys().data()); + RmmA::set_table( + TableKind::User, + space.acquire_read().table.utable.table().phys(), + ); + check_consistency(&mut space.acquire_write(), was_new, &mut tree); + } + + println!("status: {:?}", context.status); + if !context.status_reason.is_empty() { + println!("reason: {}", context.status_reason); + } + if let Some([a, b, c, d, e, f]) = context.current_syscall() { + println!( + "syscall: {}", + crate::syscall::debug::format_call(a, b, c, d, e, f) + ); + } + if let Some(ref addr_space) = context.addr_space { + let addr_space = addr_space.acquire_read(); + if !addr_space.grants.is_empty() { + println!("grants:"); + for (base, info) in addr_space.grants.iter() { + let size = info.page_count() * PAGE_SIZE; + println!( + " virt 0x{:016x}:0x{:016x} size 0x{:08x} {:?}", + base.start_address().data(), + base.start_address().data() + size - 1, + size, + info.provider, + ); + } + } + } + if let Some(regs) = context.regs() { + println!("regs:"); + regs.dump(); + + let mut rsp = regs.iret.rsp; + println!("stack: {:>016x}", rsp); + //Maximum 64 qwords + for _ in 0..64 { + if context.addr_space.as_ref().map_or(false, |space| { + space + .acquire_read() + .table + .utable + .translate(crate::paging::VirtualAddress::new(rsp)) + .is_some() + }) { + let value = *(rsp as *const usize); + println!(" {:>016x}: {:>016x}", rsp, value); + if let Some(next_rsp) = rsp.checked_add(core::mem::size_of::()) { + rsp = next_rsp; + } else { + println!(" {:>016x}: OVERFLOW", rsp); + break; + } + } else { + println!(" {:>016x}: GUARD PAGE", rsp); + break; + } + } + } + + // Switch to original page table + RmmA::set_table(TableKind::User, old_table); + + println!(); + } + crate::scheme::proc::foreach_addrsp(|addrsp| { + let was_new = spaces.insert(addrsp.acquire_read().table.utable.table().phys().data()); + check_consistency(&mut *addrsp.acquire_write(), was_new, &mut tree); + }); + for (frame, (count, p)) in tree { + let Some(info) = get_page_info(frame) else { + assert!(p); + continue; + }; + let rc = info.refcount(); + let (c, s) = match rc { + None => (0, false), + Some(RefCount::One) => (1, false), + Some(RefCount::Cow(c)) => (c.get(), false), + Some(RefCount::Shared(s)) => (s.get(), true), + }; + if c != count { + println!( + "frame refcount mismatch for {:?} ({} != {} s {})", + frame, c, count, s + ); + } + } + println!( + "({} kernel-owned references were not counted)", + temporarily_taken_htbufs + ); + + println!("DEBUGGER END"); + unsafe { + x86::bits64::rflags::clac(); + } +} +#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] +use {crate::memory::Frame, hashbrown::HashMap}; + +#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] +pub unsafe fn check_consistency( + addr_space: &mut crate::context::memory::AddrSpace, + new_as: bool, + tree: &mut HashMap, +) { + use crate::{ + context::memory::{PageSpan, Provider}, + memory::{get_page_info, RefCount}, + paging::*, + }; + + let p4 = addr_space.table.utable.table(); + + for p4i in 0..256 { + let p3 = match p4.next(p4i) { + Some(p3) => p3, + None => continue, + }; + + for p3i in 0..512 { + let p2 = match p3.next(p3i) { + Some(p2) => p2, + None => continue, + }; + + for p2i in 0..512 { + let p1 = match p2.next(p2i) { + Some(p1) => p1, + None => continue, + }; + + for p1i in 0..512 { + let (physaddr, flags) = match p1.entry(p1i) { + Some(e) => { + if let Ok(address) = e.address() { + (address, e.flags()) + } else { + continue; + } + } + _ => continue, + }; + let address = + VirtualAddress::new((p1i << 12) | (p2i << 21) | (p3i << 30) | (p4i << 39)); + + let (base, grant) = match addr_space + .grants + .contains(Page::containing_address(address)) + { + Some(g) => g, + None => { + log::error!( + "ADDRESS {:p} LACKING GRANT BUT MAPPED TO {:#0x} FLAGS {:?}!", + address.data() as *const u8, + physaddr.data(), + flags + ); + continue; + } + }; + + const EXCLUDE: usize = (1 << 5) | (1 << 6); // accessed+dirty+writable + if grant.flags().write(false).data() & !EXCLUDE + != flags.write(false).data() & !EXCLUDE + { + log::error!( + "FLAG MISMATCH: {:?} != {:?}, address {:p} in grant at {:?}", + grant.flags(), + flags, + address.data() as *const u8, + PageSpan::new(base, grant.page_count()) + ); + } + let p = matches!( + grant.provider, + Provider::PhysBorrowed { .. } + | Provider::External { .. } + | Provider::FmapBorrowed { .. } + ); + let frame = Frame::containing(physaddr); + if new_as { + tree.entry(frame).or_insert((0, p)).0 += 1; + } + + if let Some(page) = get_page_info(frame) { + match page.refcount() { + None => panic!("mapped page with zero refcount"), + + Some(RefCount::One | RefCount::Shared(_)) => assert!( + !(flags.has_write() && !grant.flags().has_write()), + "page entry has higher permissions than grant!" + ), + Some(RefCount::Cow(_)) => { + assert!(!flags.has_write(), "directly writable CoW page!") + } + } + } else { + //println!("!OWNED {:?}", frame); + } + } + } + } + } + + /*for (base, info) in addr_space.grants.iter() { + let span = PageSpan::new(base, info.page_count()); + for page in span.pages() { + let _entry = match addr_space.table.utable.translate(page.start_address()) { + Some(e) => e, + None => { + log::error!("GRANT AT {:?} LACKING MAPPING AT PAGE {:p}", span, page.start_address().data() as *const u8); + continue; + } + }; + } + }*/ + println!("Consistency appears correct"); +} diff --git a/src/devices/graphical_debug/debug.rs b/src/devices/graphical_debug/debug.rs new file mode 100644 index 00000000..f452dd3f --- /dev/null +++ b/src/devices/graphical_debug/debug.rs @@ -0,0 +1,97 @@ +use core::{cmp, ptr}; + +use super::Display; + +static FONT: &[u8] = include_bytes!("../../../res/unifont.font"); + +pub struct DebugDisplay { + pub(super) display: Display, + x: usize, + y: usize, + w: usize, + h: usize, +} + +impl DebugDisplay { + pub(super) fn new(display: Display) -> DebugDisplay { + let w = display.width / 8; + let h = display.height / 16; + DebugDisplay { + display, + x: 0, + y: 0, + w, + h, + } + } + + fn write_char(&mut self, c: char) { + if self.x >= self.w || c == '\n' { + self.x = 0; + self.y += 1; + } + + if self.y >= self.h { + let new_y = self.h - 1; + let d_y = self.y - new_y; + + self.scroll(d_y * 16); + + unsafe { + self.display + .sync(0, 0, self.display.width, self.display.height); + } + + self.y = new_y; + } + + if c != '\n' { + self.char(self.x * 8, self.y * 16, c, 0xFFFFFF); + + unsafe { + self.display.sync(self.x * 8, self.y * 16, 8, 16); + } + + self.x += 1; + } + } + + pub fn write(&mut self, buf: &[u8]) { + for &b in buf { + self.write_char(b as char); + } + } + + /// Draw a character + fn char(&mut self, x: usize, y: usize, character: char, color: u32) { + if x + 8 <= self.display.width && y + 16 <= self.display.height { + let mut dst = unsafe { self.display.data_mut().add(y * self.display.stride + x) }; + + let font_i = 16 * (character as usize); + if font_i + 16 <= FONT.len() { + for row in 0..16 { + let row_data = FONT[font_i + row]; + for col in 0..8 { + if (row_data >> (7 - col)) & 1 == 1 { + unsafe { + *dst.add(col) = color; + } + } + } + dst = unsafe { dst.add(self.display.stride) }; + } + } + } + } + + /// Scroll the screen + fn scroll(&mut self, lines: usize) { + let offset = cmp::min(self.display.height, lines) * self.display.stride; + let size = (self.display.stride * self.display.height) - offset; + unsafe { + let ptr = self.display.data_mut(); + ptr::copy(ptr.add(offset), ptr, size); + ptr::write_bytes(ptr.add(size), 0, offset); + } + } +} diff --git a/src/devices/graphical_debug/display.rs b/src/devices/graphical_debug/display.rs new file mode 100644 index 00000000..0cd57920 --- /dev/null +++ b/src/devices/graphical_debug/display.rs @@ -0,0 +1,62 @@ +use alloc::boxed::Box; +use core::{ptr, slice}; + +/// A display +pub(super) struct Display { + pub(super) width: usize, + pub(super) height: usize, + pub(super) stride: usize, + onscreen_ptr: *mut u32, + offscreen: Option>, +} + +unsafe impl Send for Display {} + +impl Display { + pub(super) fn new( + width: usize, + height: usize, + stride: usize, + onscreen_ptr: *mut u32, + ) -> Display { + unsafe { + ptr::write_bytes(onscreen_ptr, 0, stride * height); + } + Display { + width, + height, + stride, + onscreen_ptr, + offscreen: None, + } + } + + pub(super) fn heap_init(&mut self) { + let onscreen = + unsafe { slice::from_raw_parts(self.onscreen_ptr, self.stride * self.height) }; + self.offscreen = Some(onscreen.to_vec().into_boxed_slice()); + } + + pub(super) fn data_mut(&mut self) -> *mut u32 { + match &mut self.offscreen { + Some(offscreen) => offscreen.as_mut_ptr(), + None => self.onscreen_ptr, + } + } + + /// Sync from offscreen to onscreen, unsafe because it trusts provided x, y, w, h + pub(super) unsafe fn sync(&mut self, x: usize, y: usize, w: usize, mut h: usize) { + if let Some(offscreen) = &self.offscreen { + let mut offset = y * self.stride + x; + while h > 0 { + ptr::copy( + offscreen.as_ptr().add(offset), + self.onscreen_ptr.add(offset), + w, + ); + offset += self.stride; + h -= 1; + } + } + } +} diff --git a/src/devices/graphical_debug/mod.rs b/src/devices/graphical_debug/mod.rs new file mode 100644 index 00000000..9591aba2 --- /dev/null +++ b/src/devices/graphical_debug/mod.rs @@ -0,0 +1,82 @@ +use core::str; +use spin::Mutex; + +pub use self::debug::DebugDisplay; +use self::display::Display; + +pub mod debug; +pub mod display; + +pub static DEBUG_DISPLAY: Mutex> = Mutex::new(None); + +pub static FRAMEBUFFER: Mutex<(usize, usize, usize)> = Mutex::new((0, 0, 0)); + +#[allow(unused)] +pub fn init(env: &[u8]) { + println!("Starting graphical debug"); + + let mut phys = 0; + let mut virt = 0; + let mut width = 0; + let mut height = 0; + let mut stride = 0; + + //TODO: should errors be reported? + for line in str::from_utf8(env).unwrap_or("").lines() { + let mut parts = line.splitn(2, '='); + let name = parts.next().unwrap_or(""); + let value = parts.next().unwrap_or(""); + + if name == "FRAMEBUFFER_ADDR" { + phys = usize::from_str_radix(value, 16).unwrap_or(0); + } + + if name == "FRAMEBUFFER_VIRT" { + virt = usize::from_str_radix(value, 16).unwrap_or(0); + } + + if name == "FRAMEBUFFER_WIDTH" { + width = usize::from_str_radix(value, 16).unwrap_or(0); + } + + if name == "FRAMEBUFFER_HEIGHT" { + height = usize::from_str_radix(value, 16).unwrap_or(0); + } + + if name == "FRAMEBUFFER_STRIDE" { + stride = usize::from_str_radix(value, 16).unwrap_or(0); + } + } + + *FRAMEBUFFER.lock() = (phys, virt, stride * height * 4); + + if phys == 0 || virt == 0 || width == 0 || height == 0 || stride == 0 { + println!("Framebuffer not found"); + return; + } + + println!( + "Framebuffer {}x{} stride {} at {:X} mapped to {:X}", + width, height, stride, phys, virt + ); + + { + let display = Display::new(width, height, stride, virt as *mut u32); + let debug_display = DebugDisplay::new(display); + *DEBUG_DISPLAY.lock() = Some(debug_display); + } +} + +#[allow(unused)] +pub fn init_heap() { + if let Some(debug_display) = &mut *DEBUG_DISPLAY.lock() { + debug_display.display.heap_init(); + } +} + +#[allow(unused)] +pub fn fini() { + DEBUG_DISPLAY.lock().take(); + + println!("Finished graphical debug"); +} diff --git a/src/devices/mod.rs b/src/devices/mod.rs index 9dc8d5bd..1b05f812 100644 --- a/src/devices/mod.rs +++ b/src/devices/mod.rs @@ -1 +1,3 @@ -pub mod uart_16550; \ No newline at end of file +#[cfg(feature = "graphical_debug")] +pub mod graphical_debug; +pub mod uart_16550; diff --git a/src/devices/uart_16550.rs b/src/devices/uart_16550.rs index c888c8a3..2a30a42a 100644 --- a/src/devices/uart_16550.rs +++ b/src/devices/uart_16550.rs @@ -1,4 +1,13 @@ -use crate::syscall::io::{Io, Pio, Mmio, ReadOnly}; +#![allow(unused)] + +use core::{ + convert::TryInto, + ptr::{addr_of, addr_of_mut}, +}; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use crate::syscall::io::Pio; +use crate::syscall::io::{Io, Mmio, ReadOnly}; bitflags! { /// Interrupt enable flags @@ -22,7 +31,8 @@ bitflags! { } #[allow(dead_code)] -pub struct SerialPort> { +#[repr(packed(4))] +pub struct SerialPort { /// Data register, read to receive, write to send data: T, /// Interrupt enable @@ -39,6 +49,7 @@ pub struct SerialPort> { modem_sts: ReadOnly, } +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] impl SerialPort> { pub const fn new(base: u16) -> SerialPort> { SerialPort { @@ -48,53 +59,67 @@ impl SerialPort> { line_ctrl: Pio::new(base + 3), modem_ctrl: Pio::new(base + 4), line_sts: ReadOnly::new(Pio::new(base + 5)), - modem_sts: ReadOnly::new(Pio::new(base + 6)) + modem_sts: ReadOnly::new(Pio::new(base + 6)), } } } +impl SerialPort> { + #[allow(dead_code)] + pub unsafe fn new(base: usize) -> &'static mut SerialPort> { + &mut *(base as *mut Self) + } +} + impl SerialPort> { - pub fn new(_base: usize) -> SerialPort> { - SerialPort { - data: Mmio::new(), - int_en: Mmio::new(), - fifo_ctrl: Mmio::new(), - line_ctrl: Mmio::new(), - modem_ctrl: Mmio::new(), - line_sts: ReadOnly::new(Mmio::new()), - modem_sts: ReadOnly::new(Mmio::new()) - } + #[allow(dead_code)] + pub unsafe fn new(base: usize) -> &'static mut SerialPort> { + &mut *(base as *mut Self) } } -impl> SerialPort { +impl SerialPort +where + T::Value: From + TryInto, +{ pub fn init(&mut self) { - //TODO: Cleanup - self.int_en.write(0x00); - self.line_ctrl.write(0x80); - self.data.write(0x01); - self.int_en.write(0x00); - self.line_ctrl.write(0x03); - self.fifo_ctrl.write(0xC7); - self.modem_ctrl.write(0x0B); - self.int_en.write(0x01); + unsafe { + //TODO: Cleanup + // FIXME: Fix UB if unaligned + (&mut *addr_of_mut!(self.int_en)).write(0x00.into()); + (&mut *addr_of_mut!(self.line_ctrl)).write(0x80.into()); + (&mut *addr_of_mut!(self.data)).write(0x01.into()); + (&mut *addr_of_mut!(self.int_en)).write(0x00.into()); + (&mut *addr_of_mut!(self.line_ctrl)).write(0x03.into()); + (&mut *addr_of_mut!(self.fifo_ctrl)).write(0xC7.into()); + (&mut *addr_of_mut!(self.modem_ctrl)).write(0x0B.into()); + (&mut *addr_of_mut!(self.int_en)).write(0x01.into()); + } } fn line_sts(&self) -> LineStsFlags { - LineStsFlags::from_bits_truncate(self.line_sts.read()) + LineStsFlags::from_bits_truncate( + (unsafe { &*addr_of!(self.line_sts) }.read() & 0xFF.into()) + .try_into() + .unwrap_or(0), + ) } pub fn receive(&mut self) -> Option { if self.line_sts().contains(LineStsFlags::INPUT_FULL) { - Some(self.data.read()) + Some( + (unsafe { &*addr_of!(self.data) }.read() & 0xFF.into()) + .try_into() + .unwrap_or(0), + ) } else { None } } pub fn send(&mut self, data: u8) { - while ! self.line_sts().contains(LineStsFlags::OUTPUT_EMPTY) {} - self.data.write(data); + while !self.line_sts().contains(LineStsFlags::OUTPUT_EMPTY) {} + unsafe { &mut *addr_of_mut!(self.data) }.write(data.into()) } pub fn write(&mut self, buf: &[u8]) { @@ -104,7 +129,11 @@ impl> SerialPort { self.send(8); self.send(b' '); self.send(8); - }, + } + b'\n' => { + self.send(b'\r'); + self.send(b'\n'); + } _ => { self.send(b); } diff --git a/src/dtb/irqchip.rs b/src/dtb/irqchip.rs new file mode 100644 index 00000000..2a1f554d --- /dev/null +++ b/src/dtb/irqchip.rs @@ -0,0 +1,393 @@ +use super::travel_interrupt_ctrl; +use crate::{arch::device::irqchip::new_irqchip, cpu_set::LogicalCpuId, scheme::irq::irq_trigger}; +use alloc::{boxed::Box, vec::Vec}; +use byteorder::{ByteOrder, BE}; +use fdt::{node::NodeProperty, Fdt}; +use log::{debug, error}; +use syscall::{Error, Result, EINVAL}; + +pub trait InterruptHandler { + fn irq_handler(&mut self, irq: u32); +} + +#[derive(Debug, Copy, Clone)] +#[allow(dead_code)] +pub enum IrqCell { + L1(u32), + L2(u32, u32), + L3(u32, u32, u32), +} + +pub trait InterruptController: InterruptHandler { + fn irq_init( + &mut self, + fdt_opt: Option<&Fdt>, + irq_desc: &mut [IrqDesc; 1024], + ic_idx: usize, + irq_idx: &mut usize, + ) -> Result<()>; + fn irq_ack(&mut self) -> u32; + fn irq_eoi(&mut self, irq_num: u32); + fn irq_enable(&mut self, irq_num: u32); + #[allow(unused)] + fn irq_disable(&mut self, irq_num: u32); + fn irq_xlate(&self, irq_data: IrqCell) -> Result; + fn irq_to_virq(&self, hwirq: u32) -> Option; +} + +pub struct IrqConnection { + pub parent_phandle: u32, + pub parent: usize, // parent idx in chiplist + pub parent_interrupt: Option, +} + +pub struct IrqChipItem { + pub phandle: u32, + pub parents: Vec, + pub children: Vec, // child idx in chiplist + pub ic: Box, +} + +pub struct IrqChipList { + pub chips: Vec, +} + +pub struct IrqDescItem { + pub idx: usize, + pub ic_idx: usize, //ic idx in irq chip list + pub child_ic_idx: Option, //ic idx in irq chip list + pub ic_irq: u32, //hwirq in ic + pub used: bool, +} + +pub struct IrqDesc { + pub basic: IrqDescItem, + pub handler: Option>, +} + +impl IrqChipList { + fn init_inner1(&mut self, fdt: &Fdt) { + for node in fdt.all_nodes() { + if node.property("interrupt-controller").is_some() { + let compatible = node.property("compatible").unwrap().as_str().unwrap(); + let phandle = node.property("phandle").unwrap().as_usize().unwrap() as u32; + let intr_cells = node.interrupt_cells().unwrap(); + + debug!( + "{}, compatible = {}, #interrupt-cells = 0x{:08x}, phandle = 0x{:08x}", + node.name, compatible, intr_cells, phandle + ); + let mut item = IrqChipItem { + phandle, + parents: Vec::new(), + children: Vec::new(), + ic: new_irqchip(compatible).unwrap(), + }; + + fn interrupt_address( + iter: &mut impl Iterator, + interrupt_cells: usize, + ) -> Option { + match interrupt_cells { + 1 => Some(IrqCell::L1(iter.next()?)), + 2 if let Ok([a, b]) = iter.next_chunk() => Some(IrqCell::L2(a, b)), + 3 if let Ok([a, b, c]) = iter.next_chunk() => Some(IrqCell::L3(a, b, c)), + _ => None, + } + } + + fn gate_interrupt_address(addr: IrqCell) -> Option { + match addr { + IrqCell::L1(u32::MAX) + | IrqCell::L2(u32::MAX, _) + | IrqCell::L3(u32::MAX, _, _) => None, + _ => Some(addr), + } + } + + if let Some(parent) = node.interrupt_parent() + && let Some(intr_data) = node.property("interrupts") + { + // FIXME use interrupts() helper when fixed (see gh#12) + let mut intr_data = intr_data.value.chunks(4).map(|x| BE::read_u32(x)); + let parent_phandle = parent + .property("phandle") + .and_then(NodeProperty::as_usize) + .unwrap() as u32; + let parent_interrupt_cells = parent.interrupt_cells().unwrap(); + debug!("interrupt-parent = 0x{:08x}", parent_phandle); + debug!("interrupts begin:"); + while let Some(parent_interrupt) = + interrupt_address(&mut intr_data, parent_interrupt_cells) + { + debug!("{:?}, ", parent_interrupt); + item.parents.push(IrqConnection { + parent_phandle, + parent: 0, + parent_interrupt: gate_interrupt_address(parent_interrupt), + }); + } + debug!("interrupts end"); + } else if let Some(intr_data) = node.property("interrupts-extended") { + // FIXME use the helper when fixed (see gh#37) + // Shouldn't matter much since ARM seems to not use extended interrupt and + // RISC-V seems to not use 3-sized interrupt addresses + let mut intr_data = intr_data.value.chunks(4).map(|x| BE::read_u32(x)); + while let Some(parent_phandle) = intr_data.next() + && let Some(parent) = fdt.find_phandle(parent_phandle) + && let Some(parent_interrupt_cells) = parent.interrupt_cells() + && let Some(parent_interrupt) = + interrupt_address(&mut intr_data, parent_interrupt_cells) + { + debug!("{:?}, ", parent_interrupt); + item.parents.push(IrqConnection { + parent_phandle, + parent: 0, + parent_interrupt: gate_interrupt_address(parent_interrupt), + }); + } + } + + self.chips.push(item); + } + } + } + + fn init_inner2(&mut self) -> Vec { + let mut roots = Vec::new(); + + for child_i in 0..self.chips.len() { + let child = &mut self.chips[child_i]; + let phandle = child.phandle; + + if child.parents.is_empty() { + roots.push(child_i); + continue; + } + + for conn_i in 0..child.parents.len() { + let parent_phandle = self.chips[child_i].parents[conn_i].parent_phandle; + let parent_i = self + .chips + .iter() + .position(|x| parent_phandle == x.phandle) + .unwrap_or_else(|| { + panic!( + "Cannot find parent intc {} (connection from {})", + parent_phandle, phandle + ) + }); + self.chips[child_i].parents[conn_i].parent = parent_i; + let parent = &mut self.chips[parent_i]; + if !parent.children.contains(&child_i) { + parent.children.push(child_i); + } + } + } + roots + } + + fn init_inner3( + &mut self, + fdt_opt: Option<&Fdt>, + irq_desc: &mut [IrqDesc; 1024], + mut queue: Vec, + ) { + //run init + let mut irq_idx: usize = 0; + let mut queue_idx = 0; + while queue_idx < queue.len() { + let cur_idx = queue[queue_idx]; + let cur_chip = &mut self.chips[cur_idx]; + for child in &cur_chip.children { + if let Some(child_pos) = queue.iter().position(|x| *child == *x) { + assert!( + child_pos > queue_idx, + "IRQ chip tree has a cycle with phandle {} in it", + cur_chip.phandle + ); + } else { + queue.push(*child); + } + } + cur_chip + .ic + .irq_init(fdt_opt, irq_desc, cur_idx, &mut irq_idx) + .expect("Failed to initialize irq chip"); + + let cur_chip = &self.chips[cur_idx]; + for connection in &cur_chip.parents { + debug_assert!(queue[0..queue_idx].contains(&connection.parent)); + if let Some(parent_interrupt) = connection.parent_interrupt { + let parent = &self.chips[connection.parent]; + if let Ok(virq) = parent.ic.irq_xlate(parent_interrupt) { + // assert is unused + irq_desc[virq].basic.child_ic_idx = Some(cur_idx); + } else { + error!( + "Cannot connect irq chip {} to parent irq {} : {:?}", + cur_chip.phandle, parent.phandle, parent_interrupt + ); + } + } + } + + queue_idx += 1; + } + } +} + +pub struct IrqChipCore { + //TODO: support multi level interrupt constrollers + pub irq_chip_list: IrqChipList, + pub irq_desc: [IrqDesc; 1024], +} + +impl IrqChipCore { + pub fn irq_eoi(&mut self, virq: u32) { + let irq_desc = &self.irq_desc[virq as usize]; + let ic_idx = irq_desc.basic.ic_idx; + let hwirq = irq_desc.basic.ic_irq; + + self.irq_chip_list.chips[ic_idx].ic.irq_eoi(hwirq) + } + + pub fn irq_enable(&mut self, virq: u32) { + let irq_desc = &self.irq_desc[virq as usize]; + let ic_idx = irq_desc.basic.ic_idx; + let hwirq = irq_desc.basic.ic_irq; + + self.irq_chip_list.chips[ic_idx].ic.irq_enable(hwirq) + } + + #[allow(unused)] + pub fn irq_disable(&mut self, virq: u32) { + let irq_desc = &self.irq_desc[virq as usize]; + let ic_idx = irq_desc.basic.ic_idx; + let hwirq = irq_desc.basic.ic_irq; + + self.irq_chip_list.chips[ic_idx].ic.irq_disable(hwirq) + } + + #[cfg(target_arch = "riscv64")] + pub fn irq_to_virq(&self, ic_idx: usize, hwirq: u32) -> Option { + self.irq_chip_list.chips[ic_idx].ic.irq_to_virq(hwirq) + } + + pub fn irq_xlate(&self, ic_idx: usize, irq_data: &[u32]) -> Result { + let irq_data = match irq_data.len() { + 1 => IrqCell::L1(irq_data[0]), + 2 => IrqCell::L2(irq_data[0], irq_data[1]), + 3 => IrqCell::L3(irq_data[0], irq_data[1], irq_data[2]), + _ => return Err(Error::new(EINVAL)), + }; + self.irq_chip_list.chips[ic_idx].ic.irq_xlate(irq_data) + } + + pub fn trigger_virq(&mut self, virq: u32) { + if virq < 1024 { + let desc = &mut self.irq_desc[virq as usize]; + if let Some(handler) = &mut desc.handler { + handler.irq_handler(virq); + } else if let Some(ic_idx) = desc.basic.child_ic_idx { + self.irq_chip_list.chips[ic_idx].ic.irq_handler(virq); + } else { + irq_trigger(virq as u8); + } + } + } + + pub fn init(&mut self, fdt_opt: Option<&Fdt>) { + for (i, desc) in self.irq_desc.iter_mut().enumerate() { + desc.basic.idx = i; + } + if let Some(fdt) = fdt_opt { + self.irq_chip_list.init_inner1(fdt); + } + let roots = self.irq_chip_list.init_inner2(); + self.irq_chip_list + .init_inner3(fdt_opt, &mut self.irq_desc, roots); + } + + pub fn phandle_to_ic_idx(&self, phandle: u32) -> Option { + self.irq_chip_list + .chips + .iter() + .position(|x| x.phandle == phandle) + } + + pub fn irq_iter_for(&self, ic_idx: u32) -> impl Iterator + '_ { + self.irq_desc.iter().filter_map(move |x| { + if x.basic.ic_idx == ic_idx as usize { + Some(x.basic.ic_irq as u8) + } else { + None + } + }) + } +} + +pub unsafe fn acknowledge(irq: usize) { + IRQ_CHIP.irq_eoi(irq as u32); +} + +const INIT_HANDLER: Option> = None; +const INIT_IRQ_DESC: IrqDesc = IrqDesc { + basic: IrqDescItem { + idx: 0, + ic_idx: 0, + ic_irq: 0, + child_ic_idx: None, + used: false, + }, + handler: INIT_HANDLER, +}; +pub static mut IRQ_CHIP: IrqChipCore = IrqChipCore { + irq_chip_list: IrqChipList { chips: Vec::new() }, + irq_desc: [INIT_IRQ_DESC; 1024], +}; + +pub fn init(fdt: &Fdt) { + travel_interrupt_ctrl(fdt); + unsafe { + IRQ_CHIP.init(Some(fdt)); + } +} + +pub fn register_irq(virq: u32, handler: Box) { + if virq >= 1024 { + error!("irq {} exceed 1024!!!", virq); + return; + } + + unsafe { + if IRQ_CHIP.irq_desc[virq as usize].handler.is_some() { + error!("irq {} has already been registered!", virq); + return; + } + + IRQ_CHIP.irq_desc[virq as usize].handler = Some(handler); + } +} + +#[inline] +pub fn is_reserved(_cpu_id: LogicalCpuId, index: u8) -> bool { + unsafe { IRQ_CHIP.irq_desc[index as usize].basic.used } +} + +#[inline] +pub fn set_reserved(_cpu_id: LogicalCpuId, index: u8, reserved: bool) { + unsafe { + IRQ_CHIP.irq_desc[index as usize].basic.used = reserved; + if reserved { + IRQ_CHIP.irq_enable(index as u32); + } else { + IRQ_CHIP.irq_enable(index as u32); + } + } +} + +pub fn available_irqs_iter(_cpu_id: LogicalCpuId) -> impl Iterator + 'static { + error!("available_irqs_iter has been called"); + 0..0 +} diff --git a/src/dtb/mod.rs b/src/dtb/mod.rs new file mode 100644 index 00000000..68a9dda5 --- /dev/null +++ b/src/dtb/mod.rs @@ -0,0 +1,235 @@ +pub mod irqchip; + +use crate::{ + dtb::irqchip::IrqCell, + startup::memory::{register_memory_region, BootloaderMemoryKind}, +}; +use alloc::vec::Vec; +use byteorder::{ByteOrder, BE}; +use core::slice; +use fdt::{ + node::{FdtNode, NodeProperty}, + standard_nodes::MemoryRegion, + Fdt, +}; +use log::debug; +use spin::once::Once; + +pub static DTB_BINARY: Once> = Once::new(); + +pub unsafe fn init(dtb: Option<(usize, usize)>) { + let mut initialized = false; + DTB_BINARY.call_once(|| { + initialized = true; + + let mut binary = Vec::new(); + if let Some((dtb_base, dtb_size)) = dtb { + let data = unsafe { slice::from_raw_parts(dtb_base as *const u8, dtb_size) }; + binary.extend(data); + }; + binary + }); + if !initialized { + println!("DTB_BINARY INIT TWICE!"); + } +} + +pub fn travel_interrupt_ctrl(fdt: &Fdt) { + if let Some(root_intr_parent) = fdt + .root() + .property("interrupt-parent") + .and_then(NodeProperty::as_usize) + { + debug!("root parent = 0x{:08x}", root_intr_parent); + } + for node in fdt.all_nodes() { + if node.property("interrupt-controller").is_some() { + let compatible = node.property("compatible").unwrap().as_str().unwrap(); + let phandle = node.property("phandle").unwrap().as_usize().unwrap(); + let intr_cells = node.interrupt_cells().unwrap(); + let _intr = node + .property("interrupt-parent") + .and_then(NodeProperty::as_usize); + let _intr_data = node.property("interrupts"); + + debug!( + "{}, compatible = {}, #interrupt-cells = 0x{:08x}, phandle = 0x{:08x}", + node.name, compatible, intr_cells, phandle + ); + if let Some(intr) = _intr { + if let Some(intr_data) = _intr_data { + debug!("interrupt-parent = 0x{:08x}", intr); + debug!("interrupts begin:"); + for chunk in intr_data.value.chunks(4) { + debug!("0x{:08x}, ", BE::read_u32(chunk)); + } + debug!("interrupts end"); + } + } + } + } +} + +#[allow(unused)] +pub fn register_memory_ranges(dt: &Fdt) { + for chunk in dt.memory().regions() { + if let Some(size) = chunk.size { + register_memory_region( + chunk.starting_address as usize, + size, + BootloaderMemoryKind::Free, + ); + } + } +} + +pub fn register_dev_memory_ranges(dt: &Fdt) { + if cfg!(target_arch = "aarch64") { + // work around for qemu-arm64 + // dev mem: 128MB - 1GB, see https://github.com/qemu/qemu/blob/master/hw/arm/virt.c for details + let root_node = dt.root(); + let is_qemu_virt = root_node.model().contains("linux,dummy-virt"); + + if is_qemu_virt { + register_memory_region(0x08000000, 0x08000000, BootloaderMemoryKind::Device); + register_memory_region(0x10000000, 0x30000000, BootloaderMemoryKind::Device); + return; + } + } + + let Some(soc_node) = dt.find_node("/soc") else { + log::warn!("failed to find /soc in devicetree"); + return; + }; + let Some(reg) = soc_node.ranges() else { + log::warn!("devicetree /soc has no ranges"); + return; + }; + for chunk in reg { + log::debug!( + "dev mem 0x{:08x} 0x{:08x} 0x{:08x} 0x{:08x}", + chunk.child_bus_address_hi, + chunk.child_bus_address, + chunk.parent_bus_address, + chunk.size + ); + + register_memory_region( + chunk.parent_bus_address, + chunk.size, + BootloaderMemoryKind::Device, + ); + } + + // also add all soc-internal devices because they might not be shown in ranges + // (identity-mapped soc bus may have empty ranges) + for device in soc_node.children() { + if let Some(reg) = device.reg() { + for entry in reg { + if let Some(size) = entry.size { + let addr = entry.starting_address as usize; + if let Some(mapped_addr) = get_mmio_address(dt, &device, &entry) { + debug!( + "soc device {} 0x{:08x} -> 0x{:08x} size 0x{:08x}", + device.name, addr, mapped_addr, size + ); + register_memory_region(mapped_addr, size, BootloaderMemoryKind::Device); + } + } + } + } + } +} + +pub fn get_mmio_address(fdt: &Fdt, _device: &FdtNode, region: &MemoryRegion) -> Option { + /* DT spec 2.3.8 "ranges": + * The ranges property provides a means of defining a mapping or translation between + * the address space of the bus (the child address space) and the address space of the bus + * node’s parent (the parent address space). + * If the property is defined with an value, it specifies that the parent and child + * address space is identical, and no address translation is required. + * If the property is not present in a bus node, it is assumed that no mapping exists between + * children of the node and the parent address space. + */ + + // FIXME assumes all the devices are connected to CPUs via the /soc bus + let mut mapped_addr = region.starting_address as usize; + let size = region.size.unwrap_or(0).saturating_sub(1); + let last_address = mapped_addr.saturating_add(size); + if let Some(parent) = fdt.find_node("/soc") { + let mut ranges = parent.ranges().map(|f| f.peekable())?; + if ranges.peek().is_some() { + let parent_range = ranges.find(|x| { + x.child_bus_address <= mapped_addr && last_address - x.child_bus_address <= x.size + })?; + mapped_addr = parent_range + .parent_bus_address + .checked_add(mapped_addr - parent_range.child_bus_address)?; + let _ = mapped_addr.checked_add(size)?; + } + } + Some(mapped_addr) +} + +pub fn interrupt_parent<'a>(fdt: &'a Fdt, node: &'a FdtNode) -> Option> { + // FIXME traverse device tree up + node.interrupt_parent() + .or_else(|| fdt.find_node("/soc").and_then(|soc| soc.interrupt_parent())) + .or_else(|| fdt.find_node("/").and_then(|node| node.interrupt_parent())) +} + +pub fn get_interrupt(fdt: &Fdt, node: &FdtNode, idx: usize) -> Option { + let interrupts = node.property("interrupts").unwrap(); + let parent_interrupt_cells = interrupt_parent(fdt, node) + .unwrap() + .interrupt_cells() + .unwrap(); + let mut intr = interrupts + .value + .array_chunks::<4>() + .map(|f| BE::read_u32(f)) + .skip(parent_interrupt_cells * idx); + match parent_interrupt_cells { + 1 => Some(IrqCell::L1(intr.next()?)), + 2 if let Ok([a, b]) = intr.next_chunk() => Some(IrqCell::L2(a, b)), + 3 if let Ok([a, b, c]) = intr.next_chunk() => Some(IrqCell::L3(a, b, c)), + _ => None, + } +} + +pub fn diag_uart_range<'a>(dtb: &'a Fdt) -> Option<(usize, usize, bool, bool, &'a str)> { + let stdout_path = dtb.chosen().stdout()?; + let uart_node = stdout_path.node(); + let skip_init = uart_node.property("skip-init").is_some(); + let cts_event_walkaround = uart_node.property("cts-event-walkaround").is_some(); + let compatible = uart_node + .property("compatible") + .and_then(NodeProperty::as_str)?; + + let mut reg = uart_node.reg()?; + let memory = reg.nth(0)?; + let address = get_mmio_address(dtb, &uart_node, &memory)?; + + Some(( + address, + memory.size?, + skip_init, + cts_event_walkaround, + compatible, + )) +} + +#[allow(unused)] +pub fn fill_env_data(dt: &Fdt, env_base: usize) -> usize { + if let Some(bootargs) = dt.chosen().bootargs() { + let bootargs_len = bootargs.len(); + + let env_base_slice = + unsafe { slice::from_raw_parts_mut(env_base as *mut u8, bootargs_len) }; + env_base_slice[..bootargs_len].clone_from_slice(bootargs.as_bytes()); + + bootargs_len + } else { + 0 + } +} diff --git a/src/elf.rs b/src/elf.rs index debc08d8..e52cfc2d 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -1,3 +1,5 @@ +#![allow(unused)] + //! ELF executables use alloc::string::String; @@ -7,28 +9,44 @@ use goblin::elf::section_header::SHT_SYMTAB; #[cfg(target_arch = "x86")] pub use goblin::elf32::{header, program_header, section_header, sym}; -#[cfg(target_arch = "x86_64")] +#[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "x86_64" +))] pub use goblin::elf64::{header, program_header, section_header, sym}; /// An ELF executable pub struct Elf<'a> { pub data: &'a [u8], - header: &'a header::Header + header: &'a header::Header, } impl<'a> Elf<'a> { /// Create a ELF executable from data pub fn from(data: &'a [u8]) -> Result, String> { if data.len() < header::SIZEOF_EHDR { - Err(format!("Elf: Not enough data: {} < {}", data.len(), header::SIZEOF_EHDR)) + Err(format!( + "Elf: Not enough data: {} < {}", + data.len(), + header::SIZEOF_EHDR + )) } else if &data[..header::SELFMAG] != header::ELFMAG { - Err(format!("Elf: Invalid magic: {:?} != {:?}", &data[..header::SELFMAG], header::ELFMAG)) + Err(format!( + "Elf: Invalid magic: {:?} != {:?}", + &data[..header::SELFMAG], + header::ELFMAG + )) } else if data.get(header::EI_CLASS) != Some(&header::ELFCLASS) { - Err(format!("Elf: Invalid architecture: {:?} != {:?}", data.get(header::EI_CLASS), header::ELFCLASS)) + Err(format!( + "Elf: Invalid architecture: {:?} != {:?}", + data.get(header::EI_CLASS), + header::ELFCLASS + )) } else { Ok(Elf { - data: data, - header: unsafe { &*(data.as_ptr() as usize as *const header::Header) } + data, + header: unsafe { &*(data.as_ptr() as usize as *const header::Header) }, }) } } @@ -37,15 +55,7 @@ impl<'a> Elf<'a> { ElfSections { data: self.data, header: self.header, - i: 0 - } - } - - pub fn segments(&'a self) -> ElfSegments<'a> { - ElfSegments { - data: self.data, - header: self.header, - i: 0 + i: 0, } } @@ -61,24 +71,19 @@ impl<'a> Elf<'a> { if let Some(symtab) = symtab_opt { Some(ElfSymbols { data: self.data, - symtab: symtab, - i: 0 + symtab, + i: 0, }) } else { None } } - - /// Get the entry field of the header - pub fn entry(&self) -> usize { - self.header.e_entry as usize - } } pub struct ElfSections<'a> { data: &'a [u8], header: &'a header::Header, - i: usize + i: usize, } impl<'a> Iterator for ElfSections<'a> { @@ -86,11 +91,10 @@ impl<'a> Iterator for ElfSections<'a> { fn next(&mut self) -> Option { if self.i < self.header.e_shnum as usize { let item = unsafe { - &* (( - self.data.as_ptr() as usize - + self.header.e_shoff as usize - + self.i * self.header.e_shentsize as usize - ) as *const section_header::SectionHeader) + &*((self.data.as_ptr() as usize + + self.header.e_shoff as usize + + self.i * self.header.e_shentsize as usize) + as *const section_header::SectionHeader) }; self.i += 1; Some(item) @@ -103,7 +107,7 @@ impl<'a> Iterator for ElfSections<'a> { pub struct ElfSegments<'a> { data: &'a [u8], header: &'a header::Header, - i: usize + i: usize, } impl<'a> Iterator for ElfSegments<'a> { @@ -111,11 +115,10 @@ impl<'a> Iterator for ElfSegments<'a> { fn next(&mut self) -> Option { if self.i < self.header.e_phnum as usize { let item = unsafe { - &* (( - self.data.as_ptr() as usize - + self.header.e_phoff as usize - + self.i * self.header.e_phentsize as usize - ) as *const program_header::ProgramHeader) + &*((self.data.as_ptr() as usize + + self.header.e_phoff as usize + + self.i * self.header.e_phentsize as usize) + as *const program_header::ProgramHeader) }; self.i += 1; Some(item) @@ -128,7 +131,7 @@ impl<'a> Iterator for ElfSegments<'a> { pub struct ElfSymbols<'a> { data: &'a [u8], symtab: &'a section_header::SectionHeader, - i: usize + i: usize, } impl<'a> Iterator for ElfSymbols<'a> { @@ -136,11 +139,9 @@ impl<'a> Iterator for ElfSymbols<'a> { fn next(&mut self) -> Option { if self.i < (self.symtab.sh_size as usize) / sym::SIZEOF_SYM { let item = unsafe { - &* (( - self.data.as_ptr() as usize - + self.symtab.sh_offset as usize - + self.i * sym::SIZEOF_SYM - ) as *const sym::Sym) + &*((self.data.as_ptr() as usize + + self.symtab.sh_offset as usize + + self.i * sym::SIZEOF_SYM) as *const sym::Sym) }; self.i += 1; Some(item) diff --git a/src/event.rs b/src/event.rs index 7a7d25e0..67274010 100644 --- a/src/event.rs +++ b/src/event.rs @@ -1,13 +1,19 @@ use alloc::sync::Arc; -use alloc::collections::BTreeMap; use core::sync::atomic::{AtomicUsize, Ordering}; +use hashbrown::HashMap; use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; -use crate::context; -use crate::scheme::{self, SchemeId}; -use crate::sync::WaitQueue; -use crate::syscall::data::Event; -use crate::syscall::error::{Error, Result, EBADF, EINTR, ESRCH}; +use crate::{ + context, + scheme::{self, SchemeId}, + sync::WaitQueue, + syscall::{ + data::Event, + error::{Error, Result, EBADF}, + flag::EventFlags, + usercopy::UserSliceWo, + }, +}; int_like!(EventQueueId, AtomicEventQueueId, usize, AtomicUsize); @@ -19,25 +25,25 @@ pub struct EventQueue { impl EventQueue { pub fn new(id: EventQueueId) -> EventQueue { EventQueue { - id: id, - queue: WaitQueue::new() + id, + queue: WaitQueue::new(), } } - pub fn read(&self, events: &mut [Event]) -> Result { - self.queue.receive_into(events, true).ok_or(Error::new(EINTR)) + pub fn read(&self, buf: UserSliceWo, block: bool) -> Result { + self.queue.receive_into_user(buf, block, "EventQueue::read") } pub fn write(&self, events: &[Event]) -> Result { for event in events { let file = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let files = context.files.lock(); + let context_ref = context::current(); + let context = context_ref.read(); + + let files = context.files.read(); match files.get(event.id).ok_or(Error::new(EBADF))? { Some(file) => file.clone(), - None => return Err(Error::new(EBADF)) + None => return Err(Error::new(EBADF)), } }; @@ -48,12 +54,16 @@ impl EventQueue { register( RegKey { scheme, number }, - QueueKey { queue: self.id, id: event.id, data: event.data }, - event.flags + QueueKey { + queue: self.id, + id: event.id, + data: event.data, + }, + event.flags, ); let flags = sync(RegKey { scheme, number })?; - if flags > 0 { + if !flags.is_empty() { trigger(scheme, number, flags); } } @@ -62,7 +72,7 @@ impl EventQueue { } } -pub type EventQueueList = BTreeMap>; +pub type EventQueueList = HashMap>; // Next queue id static NEXT_QUEUE_ID: AtomicUsize = AtomicUsize::new(0); @@ -77,7 +87,7 @@ static QUEUES: Once> = Once::new(); /// Initialize queues, called if needed fn init_queues() -> RwLock { - RwLock::new(BTreeMap::new()) + RwLock::new(HashMap::new()) } /// Get the event queues list, const @@ -90,20 +100,20 @@ pub fn queues_mut() -> RwLockWriteGuard<'static, EventQueueList> { QUEUES.call_once(init_queues).write() } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct RegKey { pub scheme: SchemeId, pub number: usize, } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct QueueKey { pub queue: EventQueueId, pub id: usize, - pub data: usize + pub data: usize, } -type Registry = BTreeMap>; +type Registry = HashMap>; static REGISTRY: Once> = Once::new(); @@ -122,38 +132,35 @@ pub fn registry_mut() -> RwLockWriteGuard<'static, Registry> { REGISTRY.call_once(init_registry).write() } -pub fn register(reg_key: RegKey, queue_key: QueueKey, flags: usize) { +pub fn register(reg_key: RegKey, queue_key: QueueKey, flags: EventFlags) { let mut registry = registry_mut(); - let entry = registry.entry(reg_key).or_insert_with(|| { - BTreeMap::new() - }); + let entry = registry.entry(reg_key).or_insert_with(|| HashMap::new()); - if flags == 0 { + if flags.is_empty() { entry.remove(&queue_key); } else { entry.insert(queue_key, flags); } } -pub fn sync(reg_key: RegKey) -> Result { - let mut flags = 0; +pub fn sync(reg_key: RegKey) -> Result { + let mut flags = EventFlags::empty(); { let registry = registry(); if let Some(queue_list) = registry.get(®_key) { - for (_queue_key, queue_flags) in queue_list.iter() { + for (_queue_key, &queue_flags) in queue_list.iter() { flags |= queue_flags; } } } - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(reg_key.scheme).ok_or(Error::new(EBADF))?; - Arc::clone(&scheme) - }; + let scheme = scheme::schemes() + .get(reg_key.scheme) + .ok_or(Error::new(EBADF))? + .clone(); scheme.fevent(reg_key.number, flags) } @@ -169,19 +176,19 @@ pub fn unregister_file(scheme: SchemeId, number: usize) { // // } -pub fn trigger(scheme: SchemeId, number: usize, flags: usize) { +pub fn trigger(scheme: SchemeId, number: usize, flags: EventFlags) { let registry = registry(); if let Some(queue_list) = registry.get(&RegKey { scheme, number }) { - for (queue_key, queue_flags) in queue_list.iter() { + for (queue_key, &queue_flags) in queue_list.iter() { let common_flags = flags & queue_flags; - if common_flags != 0 { + if !common_flags.is_empty() { let queues = queues(); if let Some(queue) = queues.get(&queue_key.queue) { queue.queue.send(Event { id: queue_key.id, flags: common_flags, - data: queue_key.data + data: queue_key.data, }); } } diff --git a/src/externs.rs b/src/externs.rs index 4968e3c9..42f111dc 100644 --- a/src/externs.rs +++ b/src/externs.rs @@ -9,23 +9,34 @@ const WORD_SIZE: usize = mem::size_of::(); /// This faster implementation works by copying bytes not one-by-one, but in /// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures). #[no_mangle] -pub unsafe extern fn memcpy(dest: *mut u8, src: *const u8, - n: usize) -> *mut u8 { - - let n_usize: usize = n/WORD_SIZE; // Number of word sized groups - let mut i: usize = 0; - - // Copy `WORD_SIZE` bytes at a time - let n_fast = n_usize*WORD_SIZE; - while i < n_fast { - *((dest as usize + i) as *mut usize) = - *((src as usize + i) as *const usize); +pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 { + // TODO: Alignment? Some sources claim that even on relatively modern µ-arches, unaligned + // accesses spanning two pages, can take dozens of cycles. That means chunk-based memcpy can + // even be slower for small lengths if alignment is not taken into account. + // + // TODO: Optimize out smaller loops by first checking if len < WORD_SIZE, and possibly if + // dest + WORD_SIZE spans two pages, then doing one unaligned copy, then aligning up, and then + // doing one last unaligned copy? + // + // TODO: While we use the -fno-builtin equivalent, can we guarantee LLVM won't insert memcpy + // call inside here? Maybe write it in assembly? + + let mut i = 0_usize; + + // First we copy len / WORD_SIZE chunks... + + let chunks = len / WORD_SIZE; + + while i < chunks * WORD_SIZE { + dest.add(i) + .cast::() + .write_unaligned(src.add(i).cast::().read_unaligned()); i += WORD_SIZE; } - // Copy 1 byte at a time - while i < n { - *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8); + // .. then we copy len % WORD_SIZE bytes + while i < len { + dest.add(i).write(src.add(i).read()); i += 1; } @@ -39,43 +50,42 @@ pub unsafe extern fn memcpy(dest: *mut u8, src: *const u8, /// This faster implementation works by copying bytes not one-by-one, but in /// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures). #[no_mangle] -pub unsafe extern fn memmove(dest: *mut u8, src: *const u8, - n: usize) -> *mut u8 { +pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 { + let chunks = len / WORD_SIZE; + + // TODO: also require dest - src < len before choosing to copy backwards? if src < dest as *const u8 { - let n_usize: usize = n/WORD_SIZE; // Number of word sized groups - let mut i: usize = n_usize*WORD_SIZE; + // We have to copy backwards if copying upwards. - // Copy `WORD_SIZE` bytes at a time - while i != 0 { - i -= WORD_SIZE; - *((dest as usize + i) as *mut usize) = - *((src as usize + i) as *const usize); + let mut i = len; + + while i != chunks * WORD_SIZE { + i -= 1; + dest.add(i).write(src.add(i).read()); } - let mut i: usize = n; + while i > 0 { + i -= WORD_SIZE; - // Copy 1 byte at a time - while i != n_usize*WORD_SIZE { - i -= 1; - *((dest as usize + i) as *mut u8) = - *((src as usize + i) as *const u8); + dest.add(i) + .cast::() + .write_unaligned(src.add(i).cast::().read_unaligned()); } } else { - let n_usize: usize = n/WORD_SIZE; // Number of word sized groups - let mut i: usize = 0; - - // Copy `WORD_SIZE` bytes at a time - let n_fast = n_usize*WORD_SIZE; - while i < n_fast { - *((dest as usize + i) as *mut usize) = - *((src as usize + i) as *const usize); + // We have to copy forward if copying downwards. + + let mut i = 0_usize; + + while i < chunks * WORD_SIZE { + dest.add(i) + .cast::() + .write_unaligned(src.add(i).cast::().read_unaligned()); + i += WORD_SIZE; } - // Copy 1 byte at a time - while i < n { - *((dest as usize + i) as *mut u8) = - *((src as usize + i) as *const u8); + while i < len { + dest.add(i).write(src.add(i).read()); i += 1; } } @@ -90,23 +100,21 @@ pub unsafe extern fn memmove(dest: *mut u8, src: *const u8, /// This faster implementation works by setting bytes not one-by-one, but in /// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures). #[no_mangle] -pub unsafe extern fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8 { - let c: usize = mem::transmute([c as u8; WORD_SIZE]); - let n_usize: usize = n/WORD_SIZE; - let mut i: usize = 0; - - // Set `WORD_SIZE` bytes at a time - let n_fast = n_usize*WORD_SIZE; - while i < n_fast { - *((dest as usize + i) as *mut usize) = c; +pub unsafe extern "C" fn memset(dest: *mut u8, byte: i32, len: usize) -> *mut u8 { + let byte = byte as u8; + + let mut i = 0; + + let broadcasted = usize::from_ne_bytes([byte; WORD_SIZE]); + let chunks = len / WORD_SIZE; + + while i < chunks * WORD_SIZE { + dest.add(i).cast::().write_unaligned(broadcasted); i += WORD_SIZE; } - let c = c as u8; - - // Set 1 byte at a time - while i < n { - *((dest as usize + i) as *mut u8) = c; + while i < len { + dest.add(i).write(byte); i += 1; } @@ -120,34 +128,34 @@ pub unsafe extern fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8 { /// This faster implementation works by comparing bytes not one-by-one, but in /// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures). #[no_mangle] -pub unsafe extern fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { - let n_usize: usize = n/WORD_SIZE; - let mut i: usize = 0; - - let n_fast = n_usize*WORD_SIZE; - while i < n_fast { - let a = *((s1 as usize + i) as *const usize); - let b = *((s2 as usize + i) as *const usize); +pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, len: usize) -> i32 { + let mut i = 0_usize; + + // First compare WORD_SIZE chunks... + let chunks = len / WORD_SIZE; + + while i < chunks * WORD_SIZE { + let a = s1.add(i).cast::().read_unaligned(); + let b = s2.add(i).cast::().read_unaligned(); + if a != b { - let n: usize = i + WORD_SIZE; - // Find the one byte that is not equal - while i < n { - let a = *((s1 as usize + i) as *const u8); - let b = *((s2 as usize + i) as *const u8); - if a != b { - return a as i32 - b as i32; - } - i += 1; - } + // x86 has had bswap since the 80486, and the compiler will likely use the faster + // movbe. AArch64 has the REV instruction, which I think is universally available. + let diff = usize::from_be(a).wrapping_sub(usize::from_be(b)) as isize; + + // TODO: If chunk size == 32 bits, diff can be returned directly. + return diff.signum() as i32; } i += WORD_SIZE; } - while i < n { - let a = *((s1 as usize + i) as *const u8); - let b = *((s2 as usize + i) as *const u8); + // ... and then compare bytes. + while i < len { + let a = s1.add(i).read(); + let b = s2.add(i).read(); + if a != b { - return a as i32 - b as i32; + return i32::from(a) - i32::from(b); } i += 1; } diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 24b265af..00000000 --- a/src/lib.rs +++ /dev/null @@ -1,252 +0,0 @@ -//! # The Redox OS Kernel, version 2 -//! -//! The Redox OS Kernel is a microkernel that supports `x86_64` systems and -//! provides Unix-like syscalls for primarily Rust applications - -//#![deny(warnings)] -#![cfg_attr(feature = "clippy", allow(if_same_then_else))] -#![cfg_attr(feature = "clippy", allow(inline_always))] -#![cfg_attr(feature = "clippy", allow(many_single_char_names))] -#![cfg_attr(feature = "clippy", allow(module_inception))] -#![cfg_attr(feature = "clippy", allow(new_without_default))] -#![cfg_attr(feature = "clippy", allow(not_unsafe_ptr_arg_deref))] -#![cfg_attr(feature = "clippy", allow(or_fun_call))] -#![cfg_attr(feature = "clippy", allow(too_many_arguments))] -#![deny(unreachable_patterns)] -#![feature(alloc)] -#![feature(allocator_api)] -#![feature(asm)] -#![feature(concat_idents)] -#![feature(const_fn)] -#![feature(core_intrinsics)] -#![feature(integer_atomics)] -#![feature(lang_items)] -#![feature(naked_functions)] -#![feature(never_type)] -#![feature(ptr_internals)] -#![feature(thread_local)] -#![no_std] - -pub extern crate x86; - -#[macro_use] -extern crate alloc; - -#[macro_use] -extern crate bitflags; -extern crate goblin; -extern crate linked_list_allocator; -extern crate rustc_demangle; -extern crate spin; -#[cfg(feature = "slab")] -extern crate slab_allocator; - -use alloc::vec::Vec; -use core::sync::atomic::{AtomicUsize, Ordering}; - -use crate::scheme::{FileHandle, SchemeNamespace}; - -pub use crate::consts::*; - -#[macro_use] -/// Shared data structures -pub mod common; - -/// Architecture-dependent stuff -#[macro_use] -pub mod arch; -pub use crate::arch::*; - -/// Constants like memory locations -pub mod consts; - -/// Heap allocators -pub mod allocator; - -/// ACPI table parsing -#[cfg(feature = "acpi")] -mod acpi; - -/// Context management -pub mod context; - -/// Architecture-independent devices -pub mod devices; - -/// ELF file parsing -#[cfg(not(feature="doc"))] -pub mod elf; - -/// Event handling -pub mod event; - -/// External functions -pub mod externs; - -/// Logging -pub mod log; - -/// Memory management -pub mod memory; - -/// Panic -#[cfg(not(any(feature="doc", test)))] -pub mod panic; - -/// Process tracing -pub mod ptrace; - -/// Schemes, filesystem handlers -pub mod scheme; - -/// Synchronization primitives -pub mod sync; - -/// Syscall handlers -pub mod syscall; - -/// Time -pub mod time; - -/// Tests -#[cfg(test)] -pub mod tests; - -#[global_allocator] -static ALLOCATOR: allocator::Allocator = allocator::Allocator; - -/// A unique number that identifies the current CPU - used for scheduling -#[thread_local] -static CPU_ID: AtomicUsize = AtomicUsize::new(0); - -/// Get the current CPU's scheduling ID -#[inline(always)] -pub fn cpu_id() -> usize { - CPU_ID.load(Ordering::Relaxed) -} - -/// The count of all CPUs that can have work scheduled -static CPU_COUNT : AtomicUsize = AtomicUsize::new(0); - -/// Get the number of CPUs currently active -#[inline(always)] -pub fn cpu_count() -> usize { - CPU_COUNT.load(Ordering::Relaxed) -} - -static mut INIT_ENV: &[u8] = &[]; - -/// Initialize userspace by running the initfs:bin/init process -/// This function will also set the CWD to initfs:bin and open debug: as stdio -pub extern fn userspace_init() { - let path = b"initfs:/bin/init"; - let env = unsafe { INIT_ENV }; - - assert_eq!(syscall::chdir(b"initfs:"), Ok(0)); - - assert_eq!(syscall::open(b"debug:", syscall::flag::O_RDONLY).map(FileHandle::into), Ok(0)); - assert_eq!(syscall::open(b"debug:", syscall::flag::O_WRONLY).map(FileHandle::into), Ok(1)); - assert_eq!(syscall::open(b"debug:", syscall::flag::O_WRONLY).map(FileHandle::into), Ok(2)); - - let fd = syscall::open(path, syscall::flag::O_RDONLY).expect("failed to open init"); - - let mut args = Vec::new(); - args.push(path.to_vec().into_boxed_slice()); - - let mut vars = Vec::new(); - for var in env.split(|b| *b == b'\n') { - if ! var.is_empty() { - vars.push(var.to_vec().into_boxed_slice()); - } - } - - syscall::fexec_kernel(fd, args.into_boxed_slice(), vars.into_boxed_slice(), None).expect("failed to execute init"); - - panic!("init returned"); -} - -/// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this -pub fn kmain(cpus: usize, env: &'static [u8]) -> ! { - CPU_ID.store(0, Ordering::SeqCst); - CPU_COUNT.store(cpus, Ordering::SeqCst); - unsafe { INIT_ENV = env }; - - //Initialize the first context, stored in kernel/src/context/mod.rs - context::init(); - - let pid = syscall::getpid(); - println!("BSP: {:?} {}", pid, cpus); - println!("Env: {:?}", ::core::str::from_utf8(env)); - - match context::contexts_mut().spawn(userspace_init) { - Ok(context_lock) => { - let mut context = context_lock.write(); - context.rns = SchemeNamespace::from(1); - context.ens = SchemeNamespace::from(1); - context.status = context::Status::Runnable; - }, - Err(err) => { - panic!("failed to spawn userspace_init: {:?}", err); - } - } - - loop { - unsafe { - interrupt::disable(); - if context::switch() { - interrupt::enable_and_nop(); - } else { - // Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired. - interrupt::enable_and_halt(); - } - } - } -} - -/// This is the main kernel entry point for secondary CPUs -#[allow(unreachable_code, unused_variables)] -pub fn kmain_ap(id: usize) -> ! { - CPU_ID.store(id, Ordering::SeqCst); - - if cfg!(feature = "multi_core") { - context::init(); - - let pid = syscall::getpid(); - println!("AP {}: {:?}", id, pid); - - loop { - unsafe { - interrupt::disable(); - if context::switch() { - interrupt::enable_and_nop(); - } else { - // Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired. - interrupt::enable_and_halt(); - } - } - } - } else { - println!("AP {}: Disabled", id); - - loop { - unsafe { - interrupt::disable(); - interrupt::halt(); - } - } - } -} - -/// Allow exception handlers to send signal to arch-independant kernel -#[no_mangle] -pub extern fn ksignal(signal: usize) { - println!("SIGNAL {}, CPU {}, PID {:?}", signal, cpu_id(), context::context_id()); - { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.current() { - let context = context_lock.read(); - println!("NAME {}", unsafe { ::core::str::from_utf8_unchecked(&context.name.lock()) }); - } - } - syscall::exit(signal & 0x7F); -} diff --git a/src/log.rs b/src/log.rs index b0866cf3..de538960 100644 --- a/src/log.rs +++ b/src/log.rs @@ -1,5 +1,5 @@ use alloc::collections::VecDeque; -use spin::Mutex; +use spin::{Mutex, Once}; pub static LOG: Mutex> = Mutex::new(None); @@ -16,7 +16,7 @@ impl Log { pub fn new(size: usize) -> Log { Log { data: VecDeque::with_capacity(size), - size: size + size, } } @@ -33,3 +33,36 @@ impl Log { } } } + +struct RedoxLogger { + log_func: fn(&log::Record), +} + +impl ::log::Log for RedoxLogger { + fn enabled(&self, _: &log::Metadata<'_>) -> bool { + false + } + fn log(&self, record: &log::Record<'_>) { + (self.log_func)(record) + } + fn flush(&self) {} +} + +pub fn init_logger(log_func: fn(&log::Record)) { + let mut called = false; + let logger = LOGGER.call_once(|| { + ::log::set_max_level(::log::LevelFilter::Info); + called = true; + + RedoxLogger { log_func } + }); + if !called { + log::error!("Tried to reinitialize the logger, which is not possible. Ignoring.") + } + match ::log::set_logger(logger) { + Ok(_) => log::info!("Logger initialized."), + Err(e) => println!("Logger setup failed! error: {}", e), + } +} + +static LOGGER: Once = Once::new(); diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 00000000..c7488fb4 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,306 @@ +//! # The Redox OS Kernel, version 2 +//! +//! The Redox OS Kernel is a microkernel that supports `x86_64` systems and +//! provides Unix-like syscalls for primarily Rust applications + +// Necessary for alternative! macro. +#![allow(unexpected_cfgs)] +// Useful for adding comments about different branches +#![allow(clippy::if_same_then_else)] +// Useful in the syscall function +#![allow(clippy::many_single_char_names)] +// Used for context::context +#![allow(clippy::module_inception)] +// Not implementing default is sometimes useful in the case something has significant cost +// to allocate. If you implement default, it can be allocated without evidence using the +// ..Default::default() syntax. Not fun in kernel space +#![allow(clippy::new_without_default)] +// Used to make it nicer to return errors, for example, .ok_or(Error::new(ESRCH)) +#![allow(clippy::or_fun_call)] +// This is needed in some cases, like for syscall +#![allow(clippy::too_many_arguments)] +// There is no harm in this being done +#![allow(clippy::useless_format)] +// TODO: address ocurrances and then deny +#![warn(clippy::not_unsafe_ptr_arg_deref)] +// TODO: address ocurrances and then deny +#![warn(clippy::cast_ptr_alignment)] +// Indexing a slice can cause panics and that is something we always want to avoid +// in kernel code. Use .get and return an error instead +// TODO: address ocurrances and then deny +#![warn(clippy::indexing_slicing)] +// Overflows are very, very bad in kernel code as it may provide an attack vector for +// userspace applications, and it is only checked in debug builds +// TODO: address ocurrances and then deny +#![warn(clippy::integer_arithmetic)] +// Avoid panicking in the kernel without information about the panic. Use expect +// TODO: address ocurrances and then deny +#![warn(clippy::result_unwrap_used)] +// This is usually a serious issue - a missing import of a define where it is interpreted +// as a catch-all variable in a match, for example +#![deny(unreachable_patterns)] +// Ensure that all must_use results are used +#![deny(unused_must_use)] +#![feature(allocator_api)] +#![feature(core_intrinsics)] +#![allow(internal_features)] +#![feature(int_roundings)] +#![feature(iter_next_chunk)] +#![feature(let_chains)] +#![feature(naked_functions)] +#![feature(sync_unsafe_cell)] +#![feature(variant_count)] +#![cfg_attr(not(test), no_std)] +#![cfg_attr(not(test), no_main)] +#![feature(array_chunks)] +#![feature(if_let_guard)] +#![feature(iterator_try_collect)] +#[macro_use] +extern crate alloc; + +#[macro_use] +extern crate bitflags; + +use core::sync::atomic::{AtomicU32, Ordering}; + +use crate::{context::switch::SwitchResult, scheme::SchemeNamespace}; + +use crate::consts::*; + +#[macro_use] +/// Shared data structures +mod common; + +/// Architecture-dependent stuff +#[macro_use] +#[allow(dead_code)] // TODO +mod arch; +use crate::arch::*; + +/// Heap allocators +mod allocator; + +/// ACPI table parsing +#[cfg(feature = "acpi")] +#[allow(dead_code)] // TODO +mod acpi; + +#[cfg(dtb)] +mod dtb; + +/// Logical CPU ID and bitset types +mod cpu_set; + +/// Stats for the CPUs +#[cfg(feature = "sys_stat")] +mod cpu_stats; + +/// Context management +mod context; + +/// Debugger +#[cfg(feature = "debugger")] +mod debugger; + +/// Architecture-independent devices +mod devices; + +/// ELF file parsing +mod elf; + +/// Event handling +mod event; + +/// External functions +#[cfg(not(test))] +mod externs; + +/// Logging +mod log; +use ::log::info; +use alloc::sync::Arc; +use spinning_top::RwSpinlock; + +/// Memory management +mod memory; + +/// Panic +mod panic; + +mod percpu; + +/// Process tracing +mod ptrace; + +/// Performance profiling of the kernel +#[cfg(feature = "profiling")] +pub mod profiling; + +/// Schemes, filesystem handlers +mod scheme; + +/// Early init +mod startup; + +/// Synchronization primitives +mod sync; + +/// Syscall handlers +mod syscall; + +/// Time +mod time; + +#[cfg_attr(not(test), global_allocator)] +static ALLOCATOR: allocator::Allocator = allocator::Allocator; + +/// Get the current CPU's scheduling ID +#[inline(always)] +fn cpu_id() -> crate::cpu_set::LogicalCpuId { + crate::percpu::PercpuBlock::current().cpu_id +} + +/// The count of all CPUs that can have work scheduled +static CPU_COUNT: AtomicU32 = AtomicU32::new(0); + +/// Get the number of CPUs currently active +#[inline(always)] +fn cpu_count() -> u32 { + CPU_COUNT.load(Ordering::Relaxed) +} + +fn init_env() -> &'static [u8] { + crate::BOOTSTRAP.get().expect("BOOTSTRAP was not set").env +} + +extern "C" fn userspace_init() { + let bootstrap = crate::BOOTSTRAP.get().expect("BOOTSTRAP was not set"); + unsafe { crate::syscall::process::usermode_bootstrap(bootstrap) } +} + +struct Bootstrap { + base: crate::memory::Frame, + page_count: usize, + env: &'static [u8], +} +static BOOTSTRAP: spin::Once = spin::Once::new(); +static INIT_THREAD: spin::Once>> = spin::Once::new(); + +/// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this +fn kmain(cpu_count: u32, bootstrap: Bootstrap) -> ! { + CPU_COUNT.store(cpu_count, Ordering::SeqCst); + + //Initialize the first context, stored in kernel/src/context/mod.rs + context::init(); + + //Initialize global schemes, such as `acpi:`. + scheme::init_globals(); + + info!("BSP: {}", cpu_count); + info!("Env: {:?}", ::core::str::from_utf8(bootstrap.env)); + + BOOTSTRAP.call_once(|| bootstrap); + + #[cfg(feature = "profiling")] + profiling::ready_for_profiling(); + + let owner = None; // kmain not owned by any fd + match context::spawn(true, owner, userspace_init) { + Ok(context_lock) => { + { + let mut context = context_lock.write(); + context.status = context::Status::Runnable; + context.name.clear(); + context.name.push_str("[bootstrap]"); + + // TODO: Remove these from kernel + context.ens = SchemeNamespace::from(1); + context.euid = 0; + context.egid = 0; + } + INIT_THREAD.call_once(move || context_lock); + } + Err(err) => { + panic!("failed to spawn userspace_init: {:?}", err); + } + } + + run_userspace() +} + +/// This is the main kernel entry point for secondary CPUs +#[allow(unreachable_code, unused_variables, dead_code)] +fn kmain_ap(cpu_id: crate::cpu_set::LogicalCpuId) -> ! { + #[cfg(feature = "profiling")] + profiling::maybe_run_profiling_helper_forever(cpu_id); + + //TODO: workaround for bug where an AP on MeteorLake has cpu_id 0 + if !cfg!(feature = "multi_core") || cpu_id == crate::cpu_set::LogicalCpuId::BSP { + info!("AP {}: Disabled", cpu_id); + + loop { + unsafe { + interrupt::disable(); + interrupt::halt(); + } + } + } + context::init(); + + info!("AP {}", cpu_id); + + #[cfg(feature = "profiling")] + profiling::ready_for_profiling(); + + run_userspace(); +} +fn run_userspace() -> ! { + loop { + unsafe { + interrupt::disable(); + match context::switch() { + SwitchResult::Switched => { + interrupt::enable_and_nop(); + } + SwitchResult::AllContextsIdle => { + // Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired. + interrupt::enable_and_halt(); + } + } + } + } +} + +// TODO: Use this macro on aarch64 too. + +macro_rules! linker_offsets( + ($($name:ident),*) => { + $( + #[inline] + pub fn $name() -> usize { + extern "C" { + // TODO: UnsafeCell? + static $name: u8; + } + unsafe { &$name as *const u8 as usize } + } + )* + } +); +mod kernel_executable_offsets { + linker_offsets!( + __text_start, + __text_end, + __rodata_start, + __rodata_end, + __data_start, + __data_end, + __bss_start, + __bss_end, + __usercopy_start, + __usercopy_end + ); + + #[cfg(target_arch = "x86_64")] + linker_offsets!(__altrelocs_start, __altrelocs_end); +} diff --git a/src/memory/bump.rs b/src/memory/bump.rs deleted file mode 100644 index 5ffb4ad2..00000000 --- a/src/memory/bump.rs +++ /dev/null @@ -1,130 +0,0 @@ -//! # Bump frame allocator -//! Some code was borrowed from [Phil Opp's Blog](http://os.phil-opp.com/allocating-frames.html) - -use crate::paging::PhysicalAddress; - -use super::{Frame, FrameAllocator, MemoryArea, MemoryAreaIter}; - - -pub struct BumpAllocator { - next_free_frame: Frame, - current_area: Option<&'static MemoryArea>, - areas: MemoryAreaIter, - kernel_start: Frame, - kernel_end: Frame -} - -impl BumpAllocator { - pub fn new(kernel_start: usize, kernel_end: usize, memory_areas: MemoryAreaIter) -> Self { - let mut allocator = Self { - next_free_frame: Frame::containing_address(PhysicalAddress::new(0)), - current_area: None, - areas: memory_areas, - kernel_start: Frame::containing_address(PhysicalAddress::new(kernel_start)), - kernel_end: Frame::containing_address(PhysicalAddress::new(kernel_end)) - }; - allocator.choose_next_area(); - allocator - } - - fn choose_next_area(&mut self) { - self.current_area = self.areas.clone().filter(|area| { - let address = area.base_addr + area.length - 1; - Frame::containing_address(PhysicalAddress::new(address as usize)) >= self.next_free_frame - }).min_by_key(|area| area.base_addr); - - if let Some(area) = self.current_area { - let start_frame = Frame::containing_address(PhysicalAddress::new(area.base_addr as usize)); - if self.next_free_frame < start_frame { - self.next_free_frame = start_frame; - } - } - } -} - -impl FrameAllocator for BumpAllocator { - #[allow(unused)] - fn set_noncore(&mut self, noncore: bool) {} - - fn free_frames(&self) -> usize { - let mut count = 0; - - for area in self.areas.clone() { - let start_frame = Frame::containing_address(PhysicalAddress::new(area.base_addr as usize)); - let end_frame = Frame::containing_address(PhysicalAddress::new((area.base_addr + area.length - 1) as usize)); - for frame in Frame::range_inclusive(start_frame, end_frame) { - if frame >= self.kernel_start && frame <= self.kernel_end { - // Inside of kernel range - } else if frame >= self.next_free_frame { - // Frame is in free range - count += 1; - } else { - // Inside of used range - } - } - } - - count - } - - fn used_frames(&self) -> usize { - let mut count = 0; - - for area in self.areas.clone() { - let start_frame = Frame::containing_address(PhysicalAddress::new(area.base_addr as usize)); - let end_frame = Frame::containing_address(PhysicalAddress::new((area.base_addr + area.length - 1) as usize)); - for frame in Frame::range_inclusive(start_frame, end_frame) { - if frame >= self.kernel_start && frame <= self.kernel_end { - // Inside of kernel range - count += 1 - } else if frame >= self.next_free_frame { - // Frame is in free range - } else { - count += 1; - } - } - } - - count - } - - fn allocate_frames(&mut self, count: usize) -> Option { - if count == 0 { - None - } else if let Some(area) = self.current_area { - // "Clone" the frame to return it if it's free. Frame doesn't - // implement Clone, but we can construct an identical frame. - let start_frame = Frame{ number: self.next_free_frame.number }; - let end_frame = Frame { number: self.next_free_frame.number + (count - 1) }; - - // the last frame of the current area - let current_area_last_frame = { - let address = area.base_addr + area.length - 1; - Frame::containing_address(PhysicalAddress::new(address as usize)) - }; - - if end_frame > current_area_last_frame { - // all frames of current area are used, switch to next area - self.choose_next_area(); - } else if (start_frame >= self.kernel_start && start_frame <= self.kernel_end) - || (end_frame >= self.kernel_start && end_frame <= self.kernel_end) { - // `frame` is used by the kernel - self.next_free_frame = Frame { - number: self.kernel_end.number + 1 - }; - } else { - // frame is unused, increment `next_free_frame` and return it - self.next_free_frame.number += count; - return Some(start_frame); - } - // `frame` was not valid, try it again with the updated `next_free_frame` - self.allocate_frames(count) - } else { - None // no free frames left - } - } - - fn deallocate_frames(&mut self, _frame: Frame, _count: usize) { - //panic!("BumpAllocator::deallocate_frame: not supported: {:?}", frame); - } -} diff --git a/src/memory/kernel_mapper.rs b/src/memory/kernel_mapper.rs new file mode 100644 index 00000000..61a26e91 --- /dev/null +++ b/src/memory/kernel_mapper.rs @@ -0,0 +1,87 @@ +use crate::cpu_set::LogicalCpuId; +use core::sync::{ + atomic, + atomic::{AtomicUsize, Ordering}, +}; +use rmm::{PageMapper, TableKind}; + +const NO_PROCESSOR: usize = !0; +static LOCK_OWNER: AtomicUsize = AtomicUsize::new(NO_PROCESSOR); +static LOCK_COUNT: AtomicUsize = AtomicUsize::new(0); + +// TODO: Support, perhaps via const generics, embedding address checking in PageMapper, thereby +// statically enforcing that the kernel mapper can only map things in the kernel half, and vice +// versa. +/// A guard to the global lock protecting the upper 128 TiB of kernel address space. +/// +/// NOTE: Use this with great care! Since heap allocations may also require this lock when the heap +/// needs to be expended, it must not be held while memory allocations are done! +// TODO: Make the lock finer-grained so that e.g. the heap part can be independent from e.g. +// PHYS_PML4? +pub struct KernelMapper { + mapper: crate::paging::PageMapper, + ro: bool, +} +impl KernelMapper { + fn lock_inner(current_processor: usize) -> bool { + loop { + match LOCK_OWNER.compare_exchange_weak( + NO_PROCESSOR, + current_processor, + Ordering::Acquire, + Ordering::Relaxed, + ) { + Ok(_) => break, + // already owned by this hardware thread + Err(id) if id == current_processor => break, + // either CAS failed, or some other hardware thread holds the lock + Err(_) => core::hint::spin_loop(), + } + } + + let prev_count = LOCK_COUNT.fetch_add(1, Ordering::Relaxed); + atomic::compiler_fence(Ordering::Acquire); + + prev_count > 0 + } + pub unsafe fn lock_for_manual_mapper( + current_processor: LogicalCpuId, + mapper: crate::paging::PageMapper, + ) -> Self { + let ro = Self::lock_inner(current_processor.get() as usize); + Self { mapper, ro } + } + pub fn lock_manually(current_processor: LogicalCpuId) -> Self { + unsafe { + Self::lock_for_manual_mapper( + current_processor, + PageMapper::current(TableKind::Kernel, crate::memory::TheFrameAllocator), + ) + } + } + pub fn lock() -> Self { + Self::lock_manually(crate::cpu_id()) + } + pub fn get_mut(&mut self) -> Option<&mut crate::paging::PageMapper> { + if self.ro { + None + } else { + Some(&mut self.mapper) + } + } +} +impl core::ops::Deref for KernelMapper { + type Target = crate::paging::PageMapper; + + fn deref(&self) -> &Self::Target { + &self.mapper + } +} +impl Drop for KernelMapper { + fn drop(&mut self) { + if LOCK_COUNT.fetch_sub(1, Ordering::Relaxed) == 1 { + LOCK_OWNER.store(NO_PROCESSOR, Ordering::Release); + } + atomic::compiler_fence(Ordering::Release); + } +} diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 46920b6e..bf294ed4 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -1,192 +1,1075 @@ //! # Memory management //! Some code was borrowed from [Phil Opp's Blog](http://os.phil-opp.com/allocating-frames.html) -pub use crate::paging::{PAGE_SIZE, PhysicalAddress}; +mod kernel_mapper; -use self::bump::BumpAllocator; -use self::recycle::RecycleAllocator; +use core::{ + cell::SyncUnsafeCell, + mem, + num::NonZeroUsize, + sync::atomic::{AtomicUsize, Ordering}, +}; +pub use kernel_mapper::KernelMapper; use spin::Mutex; -pub mod bump; -pub mod recycle; +pub use crate::paging::{PhysicalAddress, RmmA, RmmArch, PAGE_MASK, PAGE_SIZE}; +use crate::{ + context::{ + self, + memory::{AccessMode, PfError}, + }, + kernel_executable_offsets::{__usercopy_end, __usercopy_start}, + paging::{entry::EntryFlags, Page, PageFlags}, + syscall::error::{Error, ENOMEM}, +}; +use rmm::{BumpAllocator, FrameAllocator, FrameCount, FrameUsage, TableKind, VirtualAddress}; -/// The current memory map. It's size is maxed out to 512 entries, due to it being -/// from 0x500 to 0x5000 (800 is the absolute total) -static mut MEMORY_MAP: [MemoryArea; 512] = [MemoryArea { base_addr: 0, length: 0, _type: 0, acpi: 0 }; 512]; +/// Available physical memory areas +pub(crate) static AREAS: SyncUnsafeCell<[rmm::MemoryArea; 512]> = SyncUnsafeCell::new( + [rmm::MemoryArea { + base: PhysicalAddress::new(0), + size: 0, + }; 512], +); +pub(crate) static AREA_COUNT: SyncUnsafeCell = SyncUnsafeCell::new(0); -/// Memory does not exist -pub const MEMORY_AREA_NULL: u32 = 0; - -/// Memory is free to use -pub const MEMORY_AREA_FREE: u32 = 1; +// TODO: Share code +pub(crate) fn areas() -> &'static [rmm::MemoryArea] { + // SAFETY: Both AREAS and AREA_COUNT are initialized once and then never changed. + // + // TODO: Memory hotplug? + unsafe { &(&*AREAS.get())[..AREA_COUNT.get().read().into()] } +} -/// Memory is reserved -pub const MEMORY_AREA_RESERVED: u32 = 2; +/// Get the number of frames available +pub fn free_frames() -> usize { + total_frames() - used_frames() +} -/// Memory is used by ACPI, and can be reclaimed -pub const MEMORY_AREA_ACPI: u32 = 3; +/// Get the number of frames used +pub fn used_frames() -> usize { + // TODO: Include bump allocator static pages? + FREELIST.lock().used_frames +} +pub fn total_frames() -> usize { + // TODO: Include bump allocator static pages? + sections().iter().map(|section| section.frames.len()).sum() +} -/// A memory map area -#[derive(Copy, Clone, Debug, Default)] -#[repr(packed)] -pub struct MemoryArea { - pub base_addr: u64, - pub length: u64, - pub _type: u32, - pub acpi: u32 +/// Allocate a range of frames +pub fn allocate_p2frame(order: u32) -> Option { + allocate_p2frame_complex(order, (), None, order).map(|(f, _)| f) } +pub fn allocate_frame() -> Option { + allocate_p2frame(0) +} +// TODO: Flags, strategy +pub fn allocate_p2frame_complex( + _req_order: u32, + _flags: (), + _strategy: Option<()>, + min_order: u32, +) -> Option<(Frame, usize)> { + let mut freelist = FREELIST.lock(); + + let Some((frame_order, frame)) = freelist + .for_orders + .iter() + .enumerate() + .skip(min_order as usize) + .find_map(|(i, f)| f.map(|f| (i as u32, f))) + else { + return None; + }; + + let info = get_page_info(frame) + .unwrap_or_else(|| panic!("no page info for allocated frame {frame:?}")) + .as_free() + .expect("freelist frames must not be marked used!"); + let next_free = info.next(); + //log::info!("FREE {frame:?} ORDER {frame_order} NEXT_FREE {next_free:?}"); + + debug_assert_eq!( + next_free.order(), + frame_order, + "{frame:?}->next {next_free:?}.order != {frame_order}" + ); + if let Some(next) = next_free.frame() { + let f = get_free_alloc_page_info(next); + debug_assert_eq!(f.prev().frame(), Some(frame)); + debug_assert_ne!(next, frame); + debug_assert!( + next.is_aligned_to_order(frame_order), + "NEXT {next:?} UNALIGNED" + ); + f.set_prev(P2Frame::new(None, frame_order)); + } + + debug_assert!(frame.is_aligned_to_order(frame_order)); + debug_assert_eq!(next_free.order(), frame_order); + freelist.for_orders[frame_order as usize] = next_free.frame(); -#[derive(Clone)] -pub struct MemoryAreaIter { - _type: u32, - i: usize + // TODO: Is this LIFO cache optimal? + //log::info!("MIN{min_order}FRAMEORD{frame_order}"); + for order in (min_order..frame_order).rev() { + //log::info!("SPLIT ORDER {order}"); + let order_page_count = 1 << order; + + let hi = frame.next_by(order_page_count); + //log::info!("SPLIT INTO {frame:?}:{hi:?} ORDER {order}"); + + debug_assert_eq!(freelist.for_orders[order as usize], None); + + let hi_info = get_page_info(hi) + .expect("sub-p2frame of split p2flame lacked PageInfo") + .make_free(order); + debug_assert!(!hi.is_aligned_to_order(frame_order)); + debug_assert!(hi.is_aligned_to_order(order)); + hi_info.set_next(P2Frame::new(None, order)); + hi_info.set_prev(P2Frame::new(None, order)); + freelist.for_orders[order as usize] = Some(hi); + } + + freelist.used_frames += 1 << min_order; + + info.mark_used(); + drop(freelist); + + unsafe { + (RmmA::phys_to_virt(frame.base()).data() as *mut u8).write_bytes(0, PAGE_SIZE << min_order); + } + + debug_assert!(frame.base().data() >= unsafe { ALLOCATOR_DATA.abs_off }); + + Some((frame, PAGE_SIZE << min_order)) } -impl MemoryAreaIter { - fn new(_type: u32) -> Self { - MemoryAreaIter { - _type: _type, - i: 0 +pub unsafe fn deallocate_p2frame(orig_frame: Frame, order: u32) { + let mut freelist = FREELIST.lock(); + let mut largest_order = order; + + let mut current = orig_frame; + + for merge_order in order..MAX_ORDER { + // Because there's a PageInfo, this frame must be allocator-owned. We need to be very + // careful with who owns this page, as the refcount can be anything from 0 (undefined) to + // 2^addrwidth - 1. However, allocation and deallocation must be synchronized (the "next" + // word of the PageInfo). + + let sibling = Frame::containing(PhysicalAddress::new( + current.base().data() ^ (PAGE_SIZE << merge_order), + )); + + let Some(_cur_info) = get_page_info(current) else { + unreachable!("attempting to free non-allocator-owned page"); + }; + + let Some(sib_info) = get_page_info(sibling) else { + // The frame that was deallocated, was at the unaligned start or end of its section + // (i.e. there aren't 1 << merge_order additional pages). + break; + }; + + let PageInfoKind::Free(sib_info) = sib_info.kind() else { + // The frame is currently in use (refcounted). It cannot be merged! + break; + }; + + // If the sibling p2frame has lower order than merge_order, it cannot be merged into + // current. + if sib_info.next().order() < merge_order { + break; + } + debug_assert!( + !(sib_info.next().order() > merge_order), + "sibling page has unaligned order or contains current page" + ); + //log::info!("MERGED {lo:?} WITH {hi:?} ORDER {order}"); + + if let Some(sib_prev) = sib_info.prev().frame() { + get_free_alloc_page_info(sib_prev).set_next(sib_info.next()); + } else { + debug_assert_eq!(freelist.for_orders[merge_order as usize], Some(sibling)); + debug_assert!(sib_info + .next() + .frame() + .map_or(true, |f| f.is_aligned_to_order(merge_order))); + debug_assert_eq!(sib_info.next().order(), merge_order); + freelist.for_orders[merge_order as usize] = sib_info.next().frame(); + } + if let Some(sib_next) = sib_info.next().frame() { + get_free_alloc_page_info(sib_next).set_prev(sib_info.prev()); } + + current = Frame::containing(PhysicalAddress::new( + current.base().data() & !(PAGE_SIZE << merge_order), + )); + + largest_order = merge_order + 1; } + get_page_info(current) + .expect("freeing frame without PageInfo") + .make_free(largest_order); + + let new_head = current; + debug_assert!(new_head.is_aligned_to_order(largest_order)); + + if let Some(old_head) = freelist.for_orders[largest_order as usize].replace(new_head) { + //log::info!("HEAD {:p} FREED {:p} BARRIER {:p}", get_page_info(old_head).unwrap(), get_page_info(frame).unwrap(), unsafe { ALLOCATOR_DATA.abs_off as *const u8 }); + let old_head_info = get_free_alloc_page_info(old_head); + let new_head_info = get_free_alloc_page_info(new_head); + + new_head_info.set_next(P2Frame::new(Some(old_head), largest_order)); + new_head_info.set_prev(P2Frame::new(None, largest_order)); + old_head_info.set_prev(P2Frame::new(Some(new_head), largest_order)); + } + + //log::info!("FREED {frame:?}+2^{order}"); + freelist.used_frames -= 1 << order; } -impl Iterator for MemoryAreaIter { - type Item = &'static MemoryArea; - fn next(&mut self) -> Option { - while self.i < unsafe { MEMORY_MAP.len() } { - let entry = unsafe { &MEMORY_MAP[self.i] }; - self.i += 1; - if entry._type == self._type { - return Some(entry); - } - } - None +pub unsafe fn deallocate_frame(frame: Frame) { + deallocate_p2frame(frame, 0) +} + +// Helper function for quickly mapping device memory +pub unsafe fn map_device_memory(addr: PhysicalAddress, len: usize) -> VirtualAddress { + let mut mapper_lock = KernelMapper::lock(); + let mapper = mapper_lock + .get_mut() + .expect("KernelMapper mapper locked re-entrant in map_device_memory"); + let base = PhysicalAddress::new(crate::paging::round_down_pages(addr.data())); + let aligned_len = crate::paging::round_up_pages(len + (addr.data() - base.data())); + for page_idx in 0..aligned_len / crate::memory::PAGE_SIZE { + let (_, flush) = mapper + .map_linearly( + base.add(page_idx * crate::memory::PAGE_SIZE), + PageFlags::new() + .write(true) + .custom_flag(EntryFlags::NO_CACHE.bits(), true), + ) + .expect("failed to linearly map SDT"); + flush.flush(); } + RmmA::phys_to_virt(addr) } -static ALLOCATOR: Mutex>> = Mutex::new(None); +const ORDER_COUNT: u32 = 11; +const MAX_ORDER: u32 = ORDER_COUNT - 1; -/// Init memory module -/// Must be called once, and only once, -pub unsafe fn init(kernel_start: usize, kernel_end: usize) { - // Copy memory map from bootloader location - for (i, entry) in MEMORY_MAP.iter_mut().enumerate() { - *entry = *(0x500 as *const MemoryArea).offset(i as isize); - if entry._type != MEMORY_AREA_NULL { - println!("{:?}", entry); - } +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Frame { + // On x86/x86_64, all memory below 1 MiB is reserved, and although some frames in that range + // may end up in the paging code, it's very unlikely that frame 0x0 would. + physaddr: NonZeroUsize, +} + +/// Option combined with power-of-two size. +#[derive(Clone, Copy)] +struct P2Frame(usize); +impl P2Frame { + fn new(frame: Option, order: u32) -> Self { + Self(frame.map_or(0, |f| f.physaddr.get()) | (order as usize)) + } + fn get(self) -> (Option, u32) { + let page_off_mask = PAGE_SIZE - 1; + ( + NonZeroUsize::new(self.0 & !page_off_mask & !RC_USED_NOT_FREE) + .map(|physaddr| Frame { physaddr }), + (self.0 & page_off_mask) as u32, + ) + } + fn frame(self) -> Option { + self.get().0 } + fn order(self) -> u32 { + self.get().1 + } +} +impl core::fmt::Debug for P2Frame { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let (frame, order) = self.get(); + write!(f, "[frame at {frame:?}] order {order}") + } +} - *ALLOCATOR.lock() = Some(RecycleAllocator::new(BumpAllocator::new(kernel_start, kernel_end, MemoryAreaIter::new(MEMORY_AREA_FREE)))); +impl core::fmt::Debug for Frame { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "[frame at {:p}]", self.base().data() as *const u8) + } } -/// Init memory module after core -/// Must be called once, and only once, -pub unsafe fn init_noncore() { - if let Some(ref mut allocator) = *ALLOCATOR.lock() { - allocator.set_noncore(true) - } else { - panic!("frame allocator not initialized"); +impl Frame { + /// Create a frame containing `address` + pub fn containing(address: PhysicalAddress) -> Frame { + Frame { + physaddr: NonZeroUsize::new(address.data() & !PAGE_MASK) + .expect("frame 0x0 is reserved"), + } + } + + /// Get the address of this frame + pub fn base(self) -> PhysicalAddress { + PhysicalAddress::new(self.physaddr.get()) + } + + //TODO: Set private + pub fn range_inclusive(start: Frame, end: Frame) -> impl Iterator { + (start.physaddr.get()..=end.physaddr.get()) + .step_by(PAGE_SIZE) + .map(|number| Frame { + physaddr: NonZeroUsize::new(number).unwrap(), + }) + } + #[track_caller] + pub fn next_by(self, n: usize) -> Self { + Self { + physaddr: self + .physaddr + .get() + .checked_add(n * PAGE_SIZE) + .and_then(NonZeroUsize::new) + .expect("overflow or null in Frame::next_by"), + } + } + pub fn offset_from(self, from: Self) -> usize { + self.physaddr + .get() + .checked_sub(from.physaddr.get()) + .expect("overflow in Frame::offset_from") + / PAGE_SIZE + } + pub fn is_aligned_to_order(self, order: u32) -> bool { + self.base().data() % (PAGE_SIZE << order) == 0 } } -/// Get the number of frames available -pub fn free_frames() -> usize { - if let Some(ref allocator) = *ALLOCATOR.lock() { - allocator.free_frames() - } else { - panic!("frame allocator not initialized"); +#[derive(Debug)] +pub struct Enomem; + +impl From for Error { + fn from(_: Enomem) -> Self { + Self::new(ENOMEM) } } -/// Get the number of frames used -pub fn used_frames() -> usize { - if let Some(ref allocator) = *ALLOCATOR.lock() { - allocator.used_frames() - } else { - panic!("frame allocator not initialized"); +#[derive(Debug)] +pub struct RaiiFrame { + inner: Frame, +} +impl RaiiFrame { + pub fn allocate() -> Result { + init_frame(RefCount::One) + .map_err(|_| Enomem) + .map(|inner| Self { inner }) + } + pub unsafe fn new_unchecked(inner: Frame) -> Self { + Self { inner } + } + pub fn get(&self) -> Frame { + self.inner + } + pub fn take(self) -> Frame { + let f = self.get(); + core::mem::forget(self); + f } } -/// Allocate a range of frames -pub fn allocate_frames(count: usize) -> Option { - if let Some(ref mut allocator) = *ALLOCATOR.lock() { - allocator.allocate_frames(count) - } else { - panic!("frame allocator not initialized"); +impl Drop for RaiiFrame { + fn drop(&mut self) { + if get_page_info(self.inner) + .expect("RaiiFrame lacking PageInfo") + .remove_ref() + == None + { + unsafe { + deallocate_frame(self.inner); + } + } } } -/// Deallocate a range of frames frame -pub fn deallocate_frames(frame: Frame, count: usize) { - if let Some(ref mut allocator) = *ALLOCATOR.lock() { - allocator.deallocate_frames(frame, count) - } else { - panic!("frame allocator not initialized"); +// TODO: Make PageInfo a union, since *every* allocated page will have an associated PageInfo. +// Pages that aren't AddrSpace data pages, such as paging-structure pages, might use the memory +// occupied by a PageInfo for something else, potentially allowing paging structure-level CoW too. +// +// TODO: Another interesting possibility would be to use a slab allocator for (ideally +// power-of-two) allocations smaller than a page, in which case this PageInfo might store a bitmap +// of used sub-allocations. +// +// TODO: Alternatively or in conjunction, the PageInfo can store the number of used entries for +// each page table, possibly even recursively (total number of mapped pages). +// NOTE: init_sections depends on the default initialized value consisting of all zero bytes. +#[derive(Debug)] +pub struct PageInfo { + /// Stores the reference count to this page, i.e. the number of present page table entries that + /// point to this particular frame. + /// + /// Bits 0..=N-1 are used for the actual reference count, whereas bit N-1 indicates the page is + /// shared if set, and CoW if unset. The flag is not meaningful when the refcount is 0 or 1. + pub refcount: AtomicUsize, + + // TODO: Add one flag indicating whether the page contents is zeroed? Or should this primarily + // be managed by the memory allocator first? + pub next: AtomicUsize, +} + +enum PageInfoKind<'info> { + Used(PageInfoUsed<'info>), + Free(PageInfoFree<'info>), +} +struct PageInfoUsed<'info> { + _refcount: &'info AtomicUsize, + _misc: &'info AtomicUsize, +} +struct PageInfoFree<'info> { + prev: &'info AtomicUsize, + next: &'info AtomicUsize, +} + +// There should be at least 2 bits available; even with a 4k page size on a 32-bit system (where a +// paging structure node is itself a 4k page size, i.e. on i386 with 1024 32-bit entries), there +// simply cannot be more than 2^30 entries pointing to the same page. However, to be able to use +// fetch_add safely, we reserve another bit (which makes fetch_add safe if properly reverted, and +// there aren't more than 2^(BITS-2) CPUs on the system). + +// Indicates whether the page is free (and thus managed by the allocator), or owned (and thus +// managed by the kernel heap, or most commonly, the virtual memory system). The refcount may +// increase or decrease with fetch_add, but must never flip this bit. +const RC_USED_NOT_FREE: usize = 1 << (usize::BITS - 1); + +// Only valid if RC_USED. Controls whether the page is CoW (map readonly, on page fault, copy and +// remap writable) or shared (mapped writable in the first place). +const RC_SHARED_NOT_COW: usize = 1 << (usize::BITS - 2); + +// The page refcount limit. This acts as a buffer zone allowing subsequent fetch_sub to correct +// overflow, which works as long as there's fewer CPUs than RC_MAX itself (and interrupts are +// disabled). +const RC_MAX: usize = 1 << (usize::BITS - 3); + +const RC_COUNT_MASK: usize = !(RC_USED_NOT_FREE | RC_SHARED_NOT_COW); + +// TODO: Use some of the flag bits as a tag, indicating the type of page (e.g. paging structure, +// userspace data page, or kernel heap page). This could be done only when debug assertions are +// enabled. +bitflags::bitflags! { + #[derive(Debug)] + pub struct FrameFlags: usize { + const NONE = 0; } } -/// A frame, allocated by the frame allocator. -/// Do not add more derives, or make anything `pub`! -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct Frame { - number: usize +static mut ALLOCATOR_DATA: AllocatorData = AllocatorData { + sections: &[], + abs_off: 0, +}; + +struct AllocatorData { + // TODO: Memory hotplugging? + sections: &'static [Section], + abs_off: usize, } +#[derive(Debug)] +struct FreeList { + for_orders: [Option; ORDER_COUNT as usize], + used_frames: usize, +} +static FREELIST: Mutex = Mutex::new(FreeList { + for_orders: [None; ORDER_COUNT as usize], + used_frames: 0, +}); -impl Frame { - /// Get the address of this frame - pub fn start_address(&self) -> PhysicalAddress { - PhysicalAddress::new(self.number * PAGE_SIZE) +pub struct Section { + base: Frame, + frames: &'static [PageInfo], +} + +pub const MAX_SECTION_SIZE_BITS: u32 = 27; +pub const MAX_SECTION_SIZE: usize = 1 << MAX_SECTION_SIZE_BITS; +pub const MAX_SECTION_PAGE_COUNT: usize = MAX_SECTION_SIZE / PAGE_SIZE; + +const _: () = { + assert!(mem::size_of::().is_power_of_two()); +}; + +#[cold] +fn init_sections(mut allocator: BumpAllocator) { + let (free_areas, offset_into_first_free_area) = allocator.free_areas(); + + let free_areas_iter = || { + free_areas.iter().copied().enumerate().map(|(i, area)| { + if i == 0 { + rmm::MemoryArea { + base: area.base.add(offset_into_first_free_area), + size: area.size - offset_into_first_free_area, + } + } else { + area + } + }) + }; + + let sections: &'static mut [Section] = { + let max_section_count: usize = free_areas_iter() + .map(|area| { + let aligned_end = area + .base + .add(area.size) + .data() + .next_multiple_of(MAX_SECTION_SIZE); + let aligned_start = area.base.data() / MAX_SECTION_SIZE * MAX_SECTION_SIZE; + + (aligned_end - aligned_start) / MAX_SECTION_SIZE + }) + .sum(); + let section_array_page_count = + (max_section_count * mem::size_of::
()).div_ceil(PAGE_SIZE); + + unsafe { + let base = allocator + .allocate(FrameCount::new(section_array_page_count)) + .expect("failed to allocate sections array"); + core::slice::from_raw_parts_mut( + RmmA::phys_to_virt(base).data() as *mut Section, + max_section_count, + ) + } + }; + + let mut iter = free_areas_iter().peekable(); + + let mut i = 0; + + while let Some(mut memory_map_area) = iter.next() { + // TODO: NonZeroUsize + + // TODO: x86_32 fails without this check + if memory_map_area.size == 0 { + continue; + } + + assert_ne!( + memory_map_area.size, 0, + "RMM should enforce areas are not zeroed" + ); + + // TODO: Should RMM do this? + + while let Some(next_area) = iter.peek() + && next_area.base == memory_map_area.base.add(memory_map_area.size) + { + memory_map_area.size += next_area.size; + let _ = iter.next(); + } + + assert_eq!( + memory_map_area.base.data() % PAGE_SIZE, + 0, + "RMM should enforce area alignment" + ); + assert_eq!( + memory_map_area.size % PAGE_SIZE, + 0, + "RMM should enforce area length alignment" + ); + + let mut pages_left = memory_map_area.size.div_floor(PAGE_SIZE); + let mut base = Frame::containing(memory_map_area.base); + + while pages_left > 0 { + let page_info_max_count = core::cmp::min(pages_left, MAX_SECTION_PAGE_COUNT); + let pages_to_next_section = + (MAX_SECTION_SIZE - (base.base().data() % MAX_SECTION_SIZE)) / PAGE_SIZE; + let page_info_count = core::cmp::min(page_info_max_count, pages_to_next_section); + + let page_info_array_size_pages = + (page_info_count * mem::size_of::()).div_ceil(PAGE_SIZE); + let page_info_array = unsafe { + let base = allocator + .allocate(FrameCount::new(page_info_array_size_pages)) + .expect("failed to allocate page info array"); + core::slice::from_raw_parts_mut( + RmmA::phys_to_virt(base).data() as *mut PageInfo, + page_info_count, + ) + }; + for p in &*page_info_array { + assert_eq!(p.next.load(Ordering::Relaxed), 0); + assert_eq!(p.refcount.load(Ordering::Relaxed), 0); + } + + sections[i] = Section { + base, + frames: page_info_array, + }; + i += 1; + + pages_left -= page_info_count; + base = base.next_by(page_info_count); + } } + let sections = &mut sections[..i]; - //TODO: Set private - pub fn clone(&self) -> Frame { - Frame { - number: self.number + sections.sort_unstable_by_key(|s| s.base); + + // The bump allocator has been used during the section array and page info array allocation + // phases, which means some of the PageInfos will be pointing to those arrays themselves. + // Mark those pages as used! + 'sections: for section in &*sections { + for (off, page_info) in section.frames.iter().enumerate() { + let frame = section.base.next_by(off); + if frame.base() >= allocator.abs_offset() { + break 'sections; + } + //log::info!("MARKING {frame:?} AS USED"); + page_info + .refcount + .store(RC_USED_NOT_FREE, Ordering::Relaxed); + page_info.next.store(0, Ordering::Relaxed); } } - /// Create a frame containing `address` - pub fn containing_address(address: PhysicalAddress) -> Frame { - Frame { - number: address.get() / PAGE_SIZE + let mut first_pages: [Option<(Frame, &'static PageInfo)>; ORDER_COUNT as usize] = + [None; ORDER_COUNT as usize]; + let mut last_pages = first_pages; + + let mut append_page = |page: Frame, info: &'static PageInfo, order| { + let this_page = (page, info); + + if page.base() < allocator.abs_offset() { + return; } + debug_assert!(info.as_free().is_some()); + debug_assert!(this_page.0.is_aligned_to_order(order)); + debug_assert_eq!(info.next.load(Ordering::Relaxed), order as usize); + debug_assert_eq!(info.refcount.load(Ordering::Relaxed), 0); + + let last_page = last_pages[order as usize].replace(this_page); + + if let Some((last_frame, last_page_info)) = last_page { + let last_info = last_page_info.as_free().unwrap(); + + debug_assert_eq!(last_info.next().order(), order); + debug_assert_eq!(last_info.next().frame(), None); + + last_info.set_next(P2Frame::new(Some(page), order)); + info.as_free() + .unwrap() + .set_prev(P2Frame::new(Some(last_frame), order)); + } else { + first_pages[order as usize] = Some(this_page); + info.as_free().unwrap().set_prev(P2Frame::new(None, order)); + info.as_free().unwrap().set_next(P2Frame::new(None, order)); + } + }; + unsafe { + ALLOCATOR_DATA = AllocatorData { + sections, + abs_off: allocator.abs_offset().data(), + }; } - //TODO: Set private - pub fn range_inclusive(start: Frame, end: Frame) -> FrameIter { - FrameIter { - start: start, - end: end, + for section in &*sections { + let mut base = section.base; + let mut frames = section.frames; + + for order in 0..=MAX_ORDER { + let pages_for_current_order = 1 << order; + + debug_assert_eq!(frames.len() % pages_for_current_order, 0); + debug_assert!(base.is_aligned_to_order(order)); + + if !frames.is_empty() && order != MAX_ORDER && !base.is_aligned_to_order(order + 1) { + frames[0].next.store(order as usize, Ordering::Relaxed); + // The first section page is not aligned to the next order size. + + //log::info!("ORDER {order}: FIRST {base:?}"); + append_page(base, &frames[0], order); + + base = base.next_by(pages_for_current_order); + frames = &frames[pages_for_current_order..]; + } else { + //log::info!("ORDER {order}: FIRST SKIP"); + } + + if !frames.is_empty() + && order != MAX_ORDER + && !base.next_by(frames.len()).is_aligned_to_order(order + 1) + { + // The last section page is not aligned to the next order size. + + let off = frames.len() - pages_for_current_order; + let final_page = base.next_by(off); + + frames[off].next.store(order as usize, Ordering::Relaxed); + + //log::info!("ORDER {order}: LAST {final_page:?}"); + append_page(final_page, &frames[off], order); + + frames = &frames[..off]; + } else { + //log::info!("ORDER {order}: LAST SKIP"); + } + + if frames.is_empty() { + break; + } + + if order == MAX_ORDER { + debug_assert_eq!(frames.len() % pages_for_current_order, 0); + debug_assert!(base.is_aligned_to_order(MAX_ORDER)); + + for (off, info) in frames.iter().enumerate().step_by(pages_for_current_order) { + info.next.store(MAX_ORDER as usize, Ordering::Relaxed); + append_page(base.next_by(off), info, MAX_ORDER); + } + } } + + //log::info!("SECTION from {:?}, {} pages, array at {:p}", section.base, section.frames.len(), section.frames); } + for (order, tuple_opt) in last_pages.iter().enumerate() { + let Some((frame, info)) = tuple_opt else { + continue; + }; + debug_assert!(frame.is_aligned_to_order(order as u32)); + let free = info.as_free().unwrap(); + debug_assert_eq!(free.prev().order(), order as u32); + free.set_next(P2Frame::new(None, order as u32)); + } + + FREELIST.lock().for_orders = first_pages.map(|pair| pair.map(|(frame, _)| frame)); + + //debug_freelist(); + log::info!("Initial freelist consistent"); } -pub struct FrameIter { - start: Frame, - end: Frame, +#[cold] +pub fn init_mm(allocator: BumpAllocator) { + init_sections(allocator); + + unsafe { + let the_frame = allocate_frame().expect("failed to allocate static zeroed frame"); + let the_info = get_page_info(the_frame).expect("static zeroed frame had no PageInfo"); + the_info + .refcount + .store(RefCount::One.to_raw(), Ordering::Relaxed); + + THE_ZEROED_FRAME.get().write(Some((the_frame, the_info))); + } +} +#[derive(Debug, PartialEq)] +pub enum AddRefError { + CowToShared, + SharedToCow, + RcOverflow, } +impl PageInfo { + fn kind(&self) -> PageInfoKind<'_> { + let prev = self.refcount.load(Ordering::Relaxed); -impl Iterator for FrameIter { - type Item = Frame; + if prev & RC_USED_NOT_FREE == RC_USED_NOT_FREE { + PageInfoKind::Used(PageInfoUsed { + _refcount: &self.refcount, + _misc: &self.next, + }) + } else { + PageInfoKind::Free(PageInfoFree { + prev: &self.refcount, + next: &self.next, + }) + } + } + fn as_free(&self) -> Option> { + match self.kind() { + PageInfoKind::Free(f) => Some(f), + PageInfoKind::Used(_) => None, + } + } + pub fn add_ref(&self, kind: RefKind) -> Result<(), AddRefError> { + match (self.refcount().expect("cannot add_ref to free frame"), kind) { + (RefCount::One, RefKind::Cow) => { + self.refcount.store(RC_USED_NOT_FREE | 1, Ordering::Relaxed) + } + (RefCount::One, RefKind::Shared) => self + .refcount + .store(RC_USED_NOT_FREE | 1 | RC_SHARED_NOT_COW, Ordering::Relaxed), + (RefCount::Cow(_), RefKind::Cow) | (RefCount::Shared(_), RefKind::Shared) => { + let old = self.refcount.fetch_add(1, Ordering::Relaxed); - fn next(&mut self) -> Option { - if self.start <= self.end { - let frame = self.start.clone(); - self.start.number += 1; - Some(frame) + if (old & RC_COUNT_MASK) >= RC_MAX { + self.refcount.fetch_sub(1, Ordering::Relaxed); + return Err(AddRefError::RcOverflow); + } + } + (RefCount::Cow(_), RefKind::Shared) => return Err(AddRefError::CowToShared), + (RefCount::Shared(_), RefKind::Cow) => return Err(AddRefError::SharedToCow), + } + Ok(()) + } + #[must_use = "must deallocate if refcount reaches None"] + pub fn remove_ref(&self) -> Option { + match self.refcount() { + None => panic!("refcount was already zero when calling remove_ref!"), + Some(RefCount::One) => { + // Used to be RC_USED_NOT_FREE | ?RC_SHARED_NOT_COW | 0, now becomes 0 + //self.refcount.store(0, Ordering::Relaxed); + + None + } + Some(RefCount::Cow(_) | RefCount::Shared(_)) => RefCount::from_raw({ + // Used to be RC_USED_NOT_FREE | ?RC_SHARED_NOW_COW | n, now becomes + // RC_USED_NOT_FREE | ?RC_SHARED_NOW_COW | n - 1 + (self.refcount.fetch_sub(1, Ordering::Relaxed) - 1) | RC_USED_NOT_FREE + }), + } + } + #[track_caller] + pub fn allows_writable(&self) -> bool { + match self + .refcount() + .expect("using allows_writable on free page!") + { + RefCount::One => true, + RefCount::Cow(_) => false, + RefCount::Shared(_) => true, + } + } + + pub fn refcount(&self) -> Option { + let refcount = self.refcount.load(Ordering::Relaxed); + + RefCount::from_raw(refcount) + } + fn make_free(&self, order: u32) -> PageInfoFree<'_> { + // Order needs to be known so we don't for example merge A: [A] A A A B: [B] U U U into a + // 2^3 page (if U indicates "used"). + self.refcount.store(order as usize, Ordering::Relaxed); + self.next.store(order as usize, Ordering::Relaxed); + + PageInfoFree { + next: &self.next, + prev: &self.refcount, + } + } +} +impl PageInfoFree<'_> { + fn next(&self) -> P2Frame { + P2Frame(self.next.load(Ordering::Relaxed)) + } + #[track_caller] + fn set_next(&self, next: P2Frame) { + debug_assert!(next + .frame() + .map_or(true, |f| f.is_aligned_to_order(next.order()))); + self.next.store(next.0, Ordering::Relaxed) + } + fn prev(&self) -> P2Frame { + P2Frame(self.prev.load(Ordering::Relaxed)) + } + fn set_prev(&self, prev: P2Frame) { + debug_assert!(prev + .frame() + .map_or(true, |f| f.is_aligned_to_order(prev.order()))); + self.prev.store(prev.0, Ordering::Relaxed) + } + fn mark_used(&self) { + // Order is irrelevant if marked "used" + self.prev.store(RC_USED_NOT_FREE, Ordering::Relaxed); + self.next.store(0, Ordering::Relaxed); + } +} +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum RefKind { + Cow, + Shared, + // TODO: Observer? +} +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum RefCount { + One, + Shared(NonZeroUsize), + Cow(NonZeroUsize), +} +impl RefCount { + pub fn from_raw(raw: usize) -> Option { + if raw & RC_USED_NOT_FREE != RC_USED_NOT_FREE { + return None; + } + let refcount_minus_one = raw & !(RC_SHARED_NOT_COW | RC_USED_NOT_FREE); + let nz_refcount = NonZeroUsize::new(refcount_minus_one + 1).unwrap(); + + Some(if nz_refcount.get() == 1 { + RefCount::One + } else if raw & RC_SHARED_NOT_COW == RC_SHARED_NOT_COW { + RefCount::Shared(nz_refcount) } else { - None + RefCount::Cow(nz_refcount) + }) + } + pub fn to_raw(self) -> usize { + match self { + Self::One => 0 | RC_USED_NOT_FREE, + Self::Shared(inner) => (inner.get() - 1) | RC_SHARED_NOT_COW | RC_USED_NOT_FREE, + Self::Cow(inner) => (inner.get() - 1) | RC_USED_NOT_FREE, + } + } +} +#[inline] +fn sections() -> &'static [Section] { + unsafe { ALLOCATOR_DATA.sections } +} +pub fn get_page_info(frame: Frame) -> Option<&'static PageInfo> { + let sections = sections(); + + let idx_res = sections.binary_search_by_key(&frame, |section| section.base); + + if idx_res == Err(0) { + // The frame is before the first section + return None; + } + + // binary_search_by_key returns either Ok(where it was found) or Err(where it would have been + // inserted). The base obviously cannot have been exactly matched from an entry at an + // out-of-bounds index, so the only Err(i) where i - 1 is out of bounds, is for i=0. That + // has already been checked. + let section = §ions[idx_res.unwrap_or_else(|e| e - 1)]; + + section.frames.get(frame.offset_from(section.base)) + + /* + sections + .range(..=frame) + .next_back() + .filter(|(base, section)| frame <= base.next_by(section.frames.len())) + .map(|(base, section)| PageInfoHandle { section, idx: frame.offset_from(*base) }) + */ +} + +#[track_caller] +fn get_free_alloc_page_info(frame: Frame) -> PageInfoFree<'static> { + let i = get_page_info(frame).unwrap_or_else(|| { + panic!("allocator-owned frames need a PageInfo, but none for {frame:?}") + }); + i.as_free().unwrap() //.unwrap_or_else(|| panic!("expected frame to be free, but {frame:?} wasn't, in {i:?}")) +} + +pub struct Segv; + +bitflags! { + /// Arch-generic page fault flags, modeled after x86's error code. + /// + /// This may change when arch-specific features are utilized better. + pub struct GenericPfFlags: u32 { + const PRESENT = 1 << 0; + const INVOLVED_WRITE = 1 << 1; + const USER_NOT_SUPERVISOR = 1 << 2; + const INSTR_NOT_DATA = 1 << 3; + // "reserved bits" on x86 + const INVL = 1 << 31; + } +} + +pub trait ArchIntCtx { + fn ip(&self) -> usize; + fn recover_and_efault(&mut self); +} + +pub fn page_fault_handler( + stack: &mut impl ArchIntCtx, + code: GenericPfFlags, + faulting_address: VirtualAddress, +) -> Result<(), Segv> { + let faulting_page = Page::containing_address(faulting_address); + + let usercopy_region = __usercopy_start()..__usercopy_end(); + + // TODO: Most likely not necessary, but maybe also check that the faulting address is not too + // close to USER_END. + let address_is_user = faulting_address.kind() == TableKind::User; + + let invalid_page_tables = code.contains(GenericPfFlags::INVL); + let caused_by_user = code.contains(GenericPfFlags::USER_NOT_SUPERVISOR); + let caused_by_kernel = !caused_by_user; + let caused_by_write = code.contains(GenericPfFlags::INVOLVED_WRITE); + let caused_by_instr_fetch = code.contains(GenericPfFlags::INSTR_NOT_DATA); + let is_usercopy = usercopy_region.contains(&stack.ip()); + + let mode = match (caused_by_write, caused_by_instr_fetch) { + (true, false) => AccessMode::Write, + (false, false) => AccessMode::Read, + (false, true) => AccessMode::InstrFetch, + (true, true) => { + unreachable!("page fault cannot be caused by both instruction fetch and write") + } + }; + + if invalid_page_tables { + // TODO: Better error code than Segv? + return Err(Segv); + } + + if address_is_user && (caused_by_user || is_usercopy) { + match context::memory::try_correcting_page_tables(faulting_page, mode) { + Ok(()) => return Ok(()), + Err(PfError::Oom) => todo!("oom"), + Err(PfError::Segv | PfError::RecursionLimitExceeded) => (), + Err(PfError::NonfatalInternalError) => todo!(), } } + + if address_is_user && caused_by_kernel && mode != AccessMode::InstrFetch && is_usercopy { + stack.recover_and_efault(); + return Ok(()); + } + + Err(Segv) } +static THE_ZEROED_FRAME: SyncUnsafeCell> = + SyncUnsafeCell::new(None); -pub trait FrameAllocator { - fn set_noncore(&mut self, noncore: bool); - fn free_frames(&self) -> usize; - fn used_frames(&self) -> usize; - fn allocate_frames(&mut self, size: usize) -> Option; - fn deallocate_frames(&mut self, frame: Frame, size: usize); +pub fn the_zeroed_frame() -> (Frame, &'static PageInfo) { + unsafe { + THE_ZEROED_FRAME + .get() + .read() + .expect("zeroed frame must be initialized") + } +} + +pub fn init_frame(init_rc: RefCount) -> Result { + let new_frame = allocate_frame().ok_or(PfError::Oom)?; + let page_info = get_page_info(new_frame).unwrap_or_else(|| { + panic!( + "all allocated frames need an associated page info, {:?} didn't", + new_frame + ) + }); + debug_assert_eq!(page_info.refcount(), Some(RefCount::One)); + page_info + .refcount + .store(init_rc.to_raw(), Ordering::Relaxed); + + Ok(new_frame) +} +#[derive(Debug)] +pub struct TheFrameAllocator; + +impl FrameAllocator for TheFrameAllocator { + unsafe fn allocate(&mut self, count: FrameCount) -> Option { + let order = count.data().next_power_of_two().trailing_zeros(); + allocate_p2frame(order).map(|f| f.base()) + } + unsafe fn free(&mut self, address: PhysicalAddress, count: FrameCount) { + let order = count.data().next_power_of_two().trailing_zeros(); + deallocate_p2frame(Frame::containing(address), order) + } + unsafe fn usage(&self) -> FrameUsage { + FrameUsage::new( + FrameCount::new(used_frames()), + FrameCount::new(total_frames()), + ) + } } diff --git a/src/memory/recycle.rs b/src/memory/recycle.rs deleted file mode 100644 index 31f4cc98..00000000 --- a/src/memory/recycle.rs +++ /dev/null @@ -1,122 +0,0 @@ -//! Recycle allocator -//! Uses freed frames if possible, then uses inner allocator - -use alloc::vec::Vec; - -use crate::paging::PhysicalAddress; - -use super::{Frame, FrameAllocator}; - -pub struct RecycleAllocator { - inner: T, - noncore: bool, - free: Vec<(usize, usize)>, -} - -impl RecycleAllocator { - pub fn new(inner: T) -> Self { - Self { - inner: inner, - noncore: false, - free: Vec::new(), - } - } - - fn free_count(&self) -> usize { - let mut count = 0; - for free in self.free.iter() { - count += free.1; - } - count - } - - fn merge(&mut self, address: usize, count: usize) -> bool { - for i in 0 .. self.free.len() { - let changed = { - let free = &mut self.free[i]; - if address + count * 4096 == free.0 { - free.0 = address; - free.1 += count; - true - } else if free.0 + free.1 * 4096 == address { - free.1 += count; - true - } else { - false - } - }; - - if changed { - //TODO: Use do not use recursion - let (address, count) = self.free[i]; - if self.merge(address, count) { - self.free.remove(i); - } - return true; - } - } - - false - } -} - -impl FrameAllocator for RecycleAllocator { - fn set_noncore(&mut self, noncore: bool) { - self.noncore = noncore; - } - - fn free_frames(&self) -> usize { - self.inner.free_frames() + self.free_count() - } - - fn used_frames(&self) -> usize { - self.inner.used_frames() - self.free_count() - } - - fn allocate_frames(&mut self, count: usize) -> Option { - let mut small_i = None; - { - let mut small = (0, 0); - for i in 0..self.free.len() { - let free = self.free[i]; - // Later entries can be removed faster - if free.1 >= count { - if free.1 <= small.1 || small_i.is_none() { - small_i = Some(i); - small = free; - } - } - } - } - - if let Some(i) = small_i { - let (address, remove) = { - let free = &mut self.free[i]; - free.1 -= count; - (free.0 + free.1 * 4096, free.1 == 0) - }; - - if remove { - self.free.remove(i); - } - - //println!("Restoring frame {:?}, {}", frame, count); - Some(Frame::containing_address(PhysicalAddress::new(address))) - } else { - //println!("No saved frames {}", count); - self.inner.allocate_frames(count) - } - } - - fn deallocate_frames(&mut self, frame: Frame, count: usize) { - if self.noncore { - let address = frame.start_address().get(); - if ! self.merge(address, count) { - self.free.push((address, count)); - } - } else { - //println!("Could not save frame {:?}, {}", frame, count); - self.inner.deallocate_frames(frame, count); - } - } -} diff --git a/src/panic.rs b/src/panic.rs index 1867b4ac..8efcc3a4 100644 --- a/src/panic.rs +++ b/src/panic.rs @@ -1,39 +1,147 @@ //! Intrinsics for panic handling -use core::alloc::Layout; -use core::panic::PanicInfo; +use core::{panic::PanicInfo, slice, str, sync::atomic::Ordering}; +use goblin::elf::sym; +use rmm::VirtualAddress; +use rustc_demangle::demangle; -use crate::interrupt; - -#[lang = "eh_personality"] -#[no_mangle] -pub extern "C" fn rust_eh_personality() {} +use crate::{ + arch::{consts::USER_END_OFFSET, interrupt::trace::StackTrace}, + context, cpu_id, + elf::Elf, + interrupt, + memory::KernelMapper, + start::KERNEL_SIZE, + syscall, +}; /// Required to handle panics +#[cfg(not(test))] #[panic_handler] -#[no_mangle] -pub extern "C" fn rust_begin_unwind(info: &PanicInfo) -> ! { +fn rust_begin_unwind(info: &PanicInfo) -> ! { println!("KERNEL PANIC: {}", info); - unsafe { interrupt::stack_trace(); } + unsafe { + stack_trace(); + } + + let Some(context_lock) = context::try_current() else { + println!("CPU {}, CID ", cpu_id()); + + println!("HALT"); + loop { + unsafe { + interrupt::halt(); + } + } + }; + + println!("CPU {}, CID {:p}", cpu_id(), context_lock); + + // This could deadlock, but at this point we are going to halt anyways + { + let context = context_lock.read(); + println!("NAME: {}, DEBUG ID: {}", context.name, context.debug_id); + + if let Some([a, b, c, d, e, f]) = context.current_syscall() { + println!("SYSCALL: {}", syscall::debug::format_call(a, b, c, d, e, f)); + } + } println!("HALT"); loop { - unsafe { interrupt::halt(); } + unsafe { + interrupt::halt(); + } } } -#[lang = "oom"] -#[no_mangle] -pub extern fn rust_oom(_layout: Layout) -> ! { - panic!("kernel memory allocation failed"); +/// Get a stack trace +#[inline(never)] +pub unsafe fn stack_trace() { + let mapper = KernelMapper::lock(); + + let mut frame = StackTrace::start(); + + //Maximum 64 frames + for _ in 0..64 { + if let Some(frame_) = frame { + let fp_virt = VirtualAddress::new(frame_.fp); + let pc_virt = VirtualAddress::new(frame_.pc_ptr as usize); + if fp_virt.data() >= USER_END_OFFSET + && pc_virt.data() >= USER_END_OFFSET + && (fp_virt.data() as *const usize).is_aligned() + && (pc_virt.data() as *const usize).is_aligned() + && mapper.translate(fp_virt).is_some() + && mapper.translate(pc_virt).is_some() + { + let pc = *frame_.pc_ptr; + if pc == 0 { + println!(" {:>016x}: EMPTY RETURN", frame_.fp); + break; + } else { + println!(" FP {:>016x}: PC {:>016x}", frame_.fp, pc); + symbol_trace(pc); + frame = frame_.next(); + } + } else { + println!(" {:>016x}: GUARD PAGE", frame_.fp); + break; + } + } else { + break; + } + } } +/// +/// Get a symbol +//TODO: Do not create Elf object for every symbol lookup +#[inline(never)] +pub unsafe fn symbol_trace(addr: usize) { + let kernel_ptr = crate::KERNEL_OFFSET as *const u8; + let kernel_slice = slice::from_raw_parts(kernel_ptr, KERNEL_SIZE.load(Ordering::SeqCst)); -#[allow(non_snake_case)] -#[no_mangle] -/// Required to handle panics -pub extern "C" fn _Unwind_Resume() -> ! { - loop { - unsafe { interrupt::halt(); } + if let Ok(elf) = Elf::from(kernel_slice) { + let mut strtab_opt = None; + for section in elf.sections() { + if section.sh_type == ::goblin::elf::section_header::SHT_STRTAB { + strtab_opt = Some(section); + break; + } + } + + if let Some(symbols) = elf.symbols() { + for sym in symbols { + if sym::st_type(sym.st_info) == sym::STT_FUNC + && addr >= sym.st_value as usize + && addr < (sym.st_value + sym.st_size) as usize + { + println!( + " {:>016X}+{:>04X}", + sym.st_value, + addr - sym.st_value as usize + ); + + if let Some(strtab) = strtab_opt { + let start = strtab.sh_offset as usize + sym.st_name as usize; + let mut end = start; + while end < elf.data.len() { + let b = elf.data[end]; + end += 1; + if b == 0 { + break; + } + } + + if end > start { + let sym_slice = &elf.data[start..end - 1]; + if let Ok(sym_name) = str::from_utf8(sym_slice) { + println!(" {:#}", demangle(sym_name)); + } + } + } + } + } + } } } diff --git a/src/percpu.rs b/src/percpu.rs new file mode 100644 index 00000000..e89754ef --- /dev/null +++ b/src/percpu.rs @@ -0,0 +1,194 @@ +use core::{ + cell::{Cell, RefCell}, + sync::atomic::{AtomicBool, AtomicPtr, Ordering}, +}; + +use alloc::sync::{Arc, Weak}; +use rmm::Arch; +use syscall::PtraceFlags; + +use crate::{ + context::{empty_cr3, memory::AddrSpaceWrapper, switch::ContextSwitchPercpu}, + cpu_set::{LogicalCpuId, MAX_CPU_COUNT}, + ptrace::Session, +}; + +#[cfg(feature = "sys_stat")] +use { + crate::cpu_stats::{CpuStats, CpuStatsData}, + alloc::vec::Vec, +}; + +#[cfg(feature = "syscall_debug")] +use crate::syscall::debug::SyscallDebugInfo; + +/// The percpu block, that stored all percpu variables. +pub struct PercpuBlock { + /// A unique immutable number that identifies the current CPU - used for scheduling + pub cpu_id: LogicalCpuId, + + /// Context management + pub switch_internals: ContextSwitchPercpu, + + pub current_addrsp: RefCell>>, + pub new_addrsp_tmp: Cell>>, + pub wants_tlb_shootdown: AtomicBool, + + // TODO: Put mailbox queues here, e.g. for TLB shootdown? Just be sure to 128-byte align it + // first to avoid cache invalidation. + #[cfg(feature = "profiling")] + pub profiling: Option<&'static crate::profiling::RingBuffer>, + + pub ptrace_flags: Cell, + pub ptrace_session: RefCell>>, + pub inside_syscall: Cell, + + #[cfg(feature = "syscall_debug")] + pub syscall_debug_info: Cell, + + pub misc_arch_info: crate::device::ArchPercpuMisc, + + #[cfg(feature = "sys_stat")] + pub stats: CpuStats, +} + +const NULL: AtomicPtr = AtomicPtr::new(core::ptr::null_mut()); +static ALL_PERCPU_BLOCKS: [AtomicPtr; MAX_CPU_COUNT as usize] = + [NULL; MAX_CPU_COUNT as usize]; + +#[allow(unused)] +pub unsafe fn init_tlb_shootdown(id: LogicalCpuId, block: *mut PercpuBlock) { + ALL_PERCPU_BLOCKS[id.get() as usize].store(block, Ordering::Release) +} + +#[cfg(feature = "sys_stat")] +pub fn get_all_stats() -> Vec<(LogicalCpuId, CpuStatsData)> { + let mut res = ALL_PERCPU_BLOCKS + .iter() + .filter_map(|block| unsafe { block.load(Ordering::Relaxed).as_ref() }) + .map(|block| { + let stats = &block.stats; + (block.cpu_id, stats.into()) + }) + .collect::>(); + res.sort_unstable_by_key(|(id, _stats)| id.get()); + res +} + +// PercpuBlock::current() is implemented somewhere in the arch-specific modules + +#[cfg(not(feature = "multi_core"))] +pub fn shootdown_tlb_ipi(_target: Option) {} + +#[cfg(feature = "multi_core")] +pub fn shootdown_tlb_ipi(target: Option) { + if let Some(target) = target { + let my_percpublock = PercpuBlock::current(); + assert_ne!(target, my_percpublock.cpu_id); + + let Some(percpublock) = (unsafe { + ALL_PERCPU_BLOCKS[target.get() as usize] + .load(Ordering::Acquire) + .as_ref() + }) else { + log::warn!("Trying to TLB shootdown a CPU that doesn't exist or isn't initialized."); + return; + }; + while percpublock + .wants_tlb_shootdown + .swap(true, Ordering::Release) + == true + { + // Load is faster than CAS or on x86, LOCK BTS + while percpublock.wants_tlb_shootdown.load(Ordering::Relaxed) == true { + my_percpublock.maybe_handle_tlb_shootdown(); + core::hint::spin_loop(); + } + } + + crate::ipi::ipi_single(crate::ipi::IpiKind::Tlb, target); + } else { + for id in 0..crate::cpu_count() { + // TODO: Optimize: use global counter and percpu ack counters, send IPI using + // destination shorthand "all CPUs". + shootdown_tlb_ipi(Some(LogicalCpuId::new(id))); + } + } +} +impl PercpuBlock { + pub fn maybe_handle_tlb_shootdown(&self) { + if self.wants_tlb_shootdown.swap(false, Ordering::Relaxed) == false { + return; + } + + // TODO: Finer-grained flush + unsafe { + crate::paging::RmmA::invalidate_all(); + } + + if let Some(ref addrsp) = &*self.current_addrsp.borrow() { + addrsp.tlb_ack.fetch_add(1, Ordering::Release); + } + } +} +pub unsafe fn switch_arch_hook() { + let percpu = PercpuBlock::current(); + + let cur_addrsp = percpu.current_addrsp.borrow(); + let next_addrsp = percpu.new_addrsp_tmp.take(); + + let retain_pgtbl = match (&*cur_addrsp, &next_addrsp) { + (Some(ref p), Some(ref n)) => Arc::ptr_eq(p, n), + (Some(_), None) | (None, Some(_)) => false, + (None, None) => true, + }; + if retain_pgtbl { + // If we are not switching to a different address space, we can simply return early. + } + if let Some(ref prev_addrsp) = &*cur_addrsp { + prev_addrsp + .acquire_read() + .used_by + .atomic_clear(percpu.cpu_id); + } + + drop(cur_addrsp); + + // Tell future TLB shootdown handlers that old_addrsp_tmp is no longer the current address + // space. + *percpu.current_addrsp.borrow_mut() = next_addrsp; + + if let Some(next_addrsp) = &*percpu.current_addrsp.borrow() { + let next = next_addrsp.acquire_read(); + + next.used_by.atomic_set(percpu.cpu_id); + next.table.utable.make_current(); + } else { + crate::paging::RmmA::set_table(rmm::TableKind::User, empty_cr3()); + } +} +impl PercpuBlock { + pub fn init(cpu_id: LogicalCpuId) -> Self { + Self { + cpu_id, + switch_internals: Default::default(), + current_addrsp: RefCell::new(None), + new_addrsp_tmp: Cell::new(None), + wants_tlb_shootdown: AtomicBool::new(false), + ptrace_flags: Cell::new(Default::default()), + ptrace_session: RefCell::new(None), + inside_syscall: Cell::new(false), + + #[cfg(feature = "syscall_debug")] + syscall_debug_info: Cell::new(SyscallDebugInfo::default()), + + #[cfg(feature = "profiling")] + profiling: None, + + misc_arch_info: Default::default(), + + #[cfg(feature = "sys_stat")] + stats: CpuStats::default(), + } + } +} diff --git a/src/profiling.rs b/src/profiling.rs new file mode 100644 index 00000000..5d07943b --- /dev/null +++ b/src/profiling.rs @@ -0,0 +1,259 @@ +use core::{ + cell::UnsafeCell, + mem::size_of, + sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, AtomicUsize, Ordering}, +}; + +use alloc::boxed::Box; + +use crate::{ + cpu_set::LogicalCpuId, + idt::Idt, + interrupt, + interrupt::{irq::aux_timer, InterruptStack}, + percpu::PercpuBlock, + syscall::{error::*, usercopy::UserSliceWo}, +}; + +const N: usize = 16 * 1024 * 1024; + +pub const HARDCODED_CPU_COUNT: u32 = 4; + +pub const PROFILER_CPU: LogicalCpuId = LogicalCpuId::new(HARDCODED_CPU_COUNT); + +pub struct RingBuffer { + head: AtomicUsize, + tail: AtomicUsize, + buf: &'static [UnsafeCell; N], + pub(crate) nmi_kcount: AtomicUsize, + pub(crate) nmi_ucount: AtomicUsize, +} + +impl RingBuffer { + unsafe fn advance_head(&self, n: usize) { + self.head.store( + self.head.load(Ordering::Acquire).wrapping_add(n), + Ordering::Release, + ); + } + unsafe fn advance_tail(&self, n: usize) { + self.tail.store( + self.tail.load(Ordering::Acquire).wrapping_add(n), + Ordering::Release, + ); + } + unsafe fn sender_owned(&self) -> [&[UnsafeCell]; 2] { + let head = self.head.load(Ordering::Acquire) % N; + let tail = self.tail.load(Ordering::Acquire) % N; + + if head <= tail { + [&self.buf[tail..], &self.buf[..head]] + } else { + [&self.buf[tail..head], &[]] + } + } + unsafe fn receiver_owned(&self) -> [&[UnsafeCell]; 2] { + let head = self.head.load(Ordering::Acquire) % N; + let tail = self.tail.load(Ordering::Acquire) % N; + + if head > tail { + [&self.buf[head..], &self.buf[..tail]] + } else { + [&self.buf[head..tail], &[]] + } + } + pub unsafe fn extend(&self, mut slice: &[usize]) -> usize { + let mut n = 0; + for mut sender_slice in self.sender_owned() { + while !slice.is_empty() && !sender_slice.is_empty() { + sender_slice[0].get().write(slice[0]); + slice = &slice[1..]; + sender_slice = &sender_slice[1..]; + n += 1; + } + } + self.advance_tail(n); + n + } + pub unsafe fn peek(&self) -> [&[usize]; 2] { + self.receiver_owned() + .map(|slice| core::slice::from_raw_parts(slice.as_ptr().cast(), slice.len())) + } + pub unsafe fn advance(&self, n: usize) { + self.advance_head(n) + } + pub fn create() -> &'static Self { + Box::leak(Box::new(Self { + head: AtomicUsize::new(0), + tail: AtomicUsize::new(0), + buf: Box::leak(unsafe { Box::new_zeroed().assume_init() }), + nmi_kcount: AtomicUsize::new(0), + nmi_ucount: AtomicUsize::new(0), + })) + } +} +const NULL: AtomicPtr = AtomicPtr::new(core::ptr::null_mut()); +pub static BUFS: [AtomicPtr; 4] = [NULL; 4]; + +pub const PROFILE_TOGGLEABLE: bool = true; +pub static IS_PROFILING: AtomicBool = AtomicBool::new(false); + +pub fn serio_command(index: usize, data: u8) { + if PROFILE_TOGGLEABLE { + if index == 0 && data == 30 { + // "a" key in QEMU + log::info!("Enabling profiling"); + IS_PROFILING.store(true, Ordering::SeqCst); + } else if index == 0 && data == 48 { + // "b" key + log::info!("Disabling profiling"); + IS_PROFILING.store(false, Ordering::SeqCst); + } + } +} + +pub fn drain_buffer(cpu_num: LogicalCpuId, buf: UserSliceWo) -> Result { + unsafe { + let Some(src) = BUFS + .get(cpu_num.get() as usize) + .ok_or(Error::new(EBADFD))? + .load(Ordering::Relaxed) + .as_ref() + else { + return Ok(0); + }; + let byte_slices = src.peek().map(|words| { + core::slice::from_raw_parts( + words.as_ptr().cast::(), + words.len() * size_of::(), + ) + }); + + let copied_1 = buf.copy_common_bytes_from_slice(byte_slices[0])?; + src.advance(copied_1 / size_of::()); + + let copied_2 = if let Some(remaining) = buf.advance(copied_1) { + remaining.copy_common_bytes_from_slice(byte_slices[1])? + } else { + 0 + }; + src.advance(copied_2 / size_of::()); + + Ok(copied_1 + copied_2) + } +} + +pub unsafe fn nmi_handler(stack: &InterruptStack) { + let Some(profiling) = crate::percpu::PercpuBlock::current().profiling else { + return; + }; + if !IS_PROFILING.load(Ordering::Relaxed) { + return; + } + if stack.iret.cs & 0b00 == 0b11 { + profiling.nmi_ucount.store( + profiling.nmi_ucount.load(Ordering::Relaxed) + 1, + Ordering::Relaxed, + ); + return; + } else if stack.iret.rflags & (1 << 9) != 0 { + // Interrupts were enabled, i.e. we were in kmain, so ignore. + return; + } else { + profiling.nmi_kcount.store( + profiling.nmi_kcount.load(Ordering::Relaxed) + 1, + Ordering::Relaxed, + ); + }; + + let mut buf = [0_usize; 32]; + buf[0] = stack.iret.rip & !(1 << 63); + buf[1] = x86::time::rdtsc() as usize; + + let mut bp = stack.preserved.rbp; + + let mut len = 2; + + for i in 2..32 { + if bp < crate::PHYS_OFFSET || bp.saturating_add(16) >= crate::PHYS_OFFSET + crate::PML4_SIZE + { + break; + } + let ip = ((bp + 8) as *const usize).read(); + bp = (bp as *const usize).read(); + + if ip < crate::kernel_executable_offsets::__text_start() + || ip >= crate::kernel_executable_offsets::__text_end() + { + break; + } + buf[i] = ip; + + len = i + 1; + } + + let _ = profiling.extend(&buf[..len]); +} +pub unsafe fn init() { + let percpu = PercpuBlock::current(); + + if percpu.cpu_id == PROFILER_CPU { + return; + } + + let profiling = RingBuffer::create(); + + BUFS[percpu.cpu_id.get() as usize].store( + profiling as *const _ as *mut _, + core::sync::atomic::Ordering::SeqCst, + ); + (core::ptr::addr_of!(percpu.profiling) as *mut Option<&'static RingBuffer>) + .write(Some(profiling)); +} + +static ACK: AtomicU32 = AtomicU32::new(0); + +pub fn ready_for_profiling() { + ACK.fetch_add(1, Ordering::Relaxed); +} + +pub fn maybe_run_profiling_helper_forever(cpu_id: LogicalCpuId) { + if cpu_id != PROFILER_CPU { + return; + } + unsafe { + for i in 33..255 { + crate::idt::IDTS + .write() + .as_mut() + .unwrap() + .get_mut(&cpu_id) + .unwrap() + .entries[i] + .set_func(crate::interrupt::ipi::wakeup); + } + + let apic = &mut crate::device::local_apic::the_local_apic(); + apic.set_lvt_timer((0b01 << 17) | 32); + apic.set_div_conf(0b1011); + apic.set_init_count(0xffff_f); + + while ACK.load(Ordering::Relaxed) < HARDCODED_CPU_COUNT { + core::hint::spin_loop(); + } + assert_eq!(crate::cpu_count(), HARDCODED_CPU_COUNT + 1); + + interrupt::enable_and_nop(); + loop { + interrupt::halt(); + } + } +} + +pub fn maybe_setup_timer(idt: &mut Idt, cpu_id: LogicalCpuId) { + if cpu_id != PROFILER_CPU { + return; + } + idt.entries[32].set_func(aux_timer); + idt.set_reserved_mut(32, true); +} diff --git a/src/ptrace.rs b/src/ptrace.rs index 3f3390f4..d555a5c0 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -1,40 +1,18 @@ +//! The backend of the "proc:" scheme. Most internal breakpoint +//! handling should go here, unless they closely depend on the design +//! of the scheme. + use crate::{ - arch::{ - macros::InterruptStack, - paging::{ - entry::EntryFlags, - mapper::MapperFlushAll, - temporary_page::TemporaryPage, - ActivePageTable, InactivePageTable, Page, PAGE_SIZE, VirtualAddress - } - }, - common::unique::Unique, - context::{self, signal, Context, ContextId, Status}, event, - scheme::proc, - sync::WaitCondition + percpu::PercpuBlock, + scheme::GlobalSchemes, + sync::WaitCondition, + syscall::{data::PtraceEvent, error::*, flag::*, ptrace_event}, }; -use alloc::{ - boxed::Box, - collections::{ - BTreeMap, - VecDeque, - btree_map::Entry - }, - sync::Arc, - vec::Vec -}; -use core::{ - cmp, - sync::atomic::Ordering -}; -use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; -use syscall::{ - data::PtraceEvent, - error::*, - flag::* -}; +use alloc::{collections::VecDeque, sync::Arc}; +use core::cmp; +use spin::Mutex; // ____ _ // / ___| ___ ___ ___(_) ___ _ __ ___ @@ -43,118 +21,132 @@ use syscall::{ // |____/ \___||___/___/_|\___/|_| |_|___/ #[derive(Debug)] -struct Session { - file_id: usize, +pub struct SessionData { + pub(crate) breakpoint: Option, events: VecDeque, - breakpoint: Option, - tracer: Arc + file_id: usize, } +impl SessionData { + fn add_event(&mut self, event: PtraceEvent) { + self.events.push_back(event); -type SessionMap = BTreeMap; + // Notify nonblocking tracers + if self.events.len() == 1 { + // If the list of events was previously empty, alert now + proc_trigger_event(self.file_id, EVENT_READ); + } + } -static SESSIONS: Once> = Once::new(); + /// Override the breakpoint for the specified tracee. Pass `None` to clear + /// breakpoint. + pub fn set_breakpoint(&mut self, flags: Option) { + self.breakpoint = flags.map(|flags| Breakpoint { + reached: false, + flags, + }); + } -fn init_sessions() -> RwLock { - RwLock::new(BTreeMap::new()) -} -fn sessions() -> RwLockReadGuard<'static, SessionMap> { - SESSIONS.call_once(init_sessions).read() -} -fn sessions_mut() -> RwLockWriteGuard<'static, SessionMap> { - SESSIONS.call_once(init_sessions).write() -} + /// Returns true if the breakpoint is reached, or if there isn't a + /// breakpoint + pub fn is_reached(&self) -> bool { + self.breakpoint.as_ref().map(|b| b.reached).unwrap_or(false) + } -/// Try to create a new session, but fail if one already exists for -/// this process -pub fn try_new_session(pid: ContextId, file_id: usize) -> bool { - let mut sessions = sessions_mut(); + /// Used for getting the flags in fevent + pub fn session_fevent_flags(&self) -> EventFlags { + let mut flags = EventFlags::empty(); - match sessions.entry(pid) { - Entry::Occupied(_) => false, - Entry::Vacant(vacant) => { - vacant.insert(Session { - file_id, - events: VecDeque::new(), - breakpoint: None, - tracer: Arc::new(WaitCondition::new()) - }); - true + if !self.events.is_empty() { + flags |= EVENT_READ; } + + flags } -} -/// Returns true if a session is attached to this process -pub fn is_traced(pid: ContextId) -> bool { - sessions().contains_key(&pid) + /// Poll events, return the amount read. This drains events from the queue. + pub fn recv_events(&mut self, out: &mut [PtraceEvent]) -> usize { + let len = cmp::min(out.len(), self.events.len()); + for (dst, src) in out.iter_mut().zip(self.events.drain(..len)) { + *dst = src; + } + len + } } -/// Used for getting the flags in fevent -pub fn session_fevent_flags(pid: ContextId) -> Option { - let sessions = sessions(); - let session = sessions.get(&pid)?; - let mut flags = 0; - if !session.events.is_empty() { - flags |= EVENT_READ; +#[derive(Debug)] +pub struct Session { + pub data: Mutex, + pub tracee: WaitCondition, + pub tracer: WaitCondition, +} +impl Session { + pub fn current() -> Option> { + PercpuBlock::current() + .ptrace_session + .borrow() + .as_ref()? + .upgrade() } - if session.breakpoint.as_ref().map(|b| b.reached).unwrap_or(true) { - flags |= EVENT_WRITE; + pub fn try_new(file_id: usize) -> Result> { + Arc::try_new(Session { + data: Mutex::new(SessionData { + breakpoint: None, + events: VecDeque::new(), + file_id, + }), + tracee: WaitCondition::new(), + tracer: WaitCondition::new(), + }) + .map_err(|_| Error::new(ENOMEM)) } - Some(flags) } /// Remove the session from the list of open sessions and notify any /// waiting processes -pub fn close_session(pid: ContextId) { - if let Some(session) = sessions_mut().remove(&pid) { - session.tracer.notify(); - if let Some(breakpoint) = session.breakpoint { - breakpoint.tracee.notify(); - } - } +// TODO +pub fn close_session(session: &Session) { + session.tracer.notify(); + session.tracee.notify(); +} + +/// Wake up the tracer to make sure it catches on that the tracee is dead. This +/// is different from `close_session` in that it doesn't actually close the +/// session, and instead waits for the file handle to be closed, where the +/// session will *actually* be closed. This is partly to ensure ENOSRCH is +/// returned rather than ENODEV (which occurs when there's no session - should +/// never really happen). +pub fn close_tracee(session: &Session) { + session.tracer.notify(); + + let data = session.data.lock(); + proc_trigger_event(data.file_id, EVENT_READ); } /// Trigger a notification to the event: scheme -fn proc_trigger_event(file_id: usize, flags: usize) { - event::trigger(proc::PROC_SCHEME_ID.load(Ordering::SeqCst), file_id, flags); +fn proc_trigger_event(file_id: usize, flags: EventFlags) { + event::trigger(GlobalSchemes::Proc.scheme_id(), file_id, flags); } /// Dispatch an event to any tracer tracing `self`. This will cause /// the tracer to wake up and poll for events. Returns Some(()) if an /// event was sent. pub fn send_event(event: PtraceEvent) -> Option<()> { - let contexts = context::contexts(); - let context = contexts.current()?; - let context = context.read(); - - let mut sessions = sessions_mut(); - let session = sessions.get_mut(&context.id)?; + let session = Session::current()?; + let mut data = session.data.lock(); + let breakpoint = data.breakpoint.as_ref()?; - session.events.push_back(event); - - // Notify nonblocking tracers - if session.events.len() == 1 { - // If the list of events was previously empty, alert now - proc_trigger_event(session.file_id, EVENT_READ); + if event.cause & breakpoint.flags != event.cause { + return None; } - // Alert blocking tracers + // Add event to queue + data.add_event(event); + // Notify tracer session.tracer.notify(); Some(()) } -/// Poll events, return the amount read -pub fn recv_events(pid: ContextId, out: &mut [PtraceEvent]) -> Option { - let mut sessions = sessions_mut(); - let session = sessions.get_mut(&pid)?; - - let len = cmp::min(out.len(), session.events.len()); - for (dst, src) in out.iter_mut().zip(session.events.drain(..len)) { - *dst = src; - } - Some(len) -} - // ____ _ _ _ // | __ ) _ __ ___ __ _| | ___ __ ___ (_)_ __ | |_ ___ // | _ \| '__/ _ \/ _` | |/ / '_ \ / _ \| | '_ \| __/ __| @@ -162,308 +154,89 @@ pub fn recv_events(pid: ContextId, out: &mut [PtraceEvent]) -> Option { // |____/|_| \___|\__,_|_|\_\ .__/ \___/|_|_| |_|\__|___/ // |_| -#[derive(Debug)] -struct Breakpoint { - tracee: Arc, +#[derive(Debug, Clone, Copy)] +pub(crate) struct Breakpoint { reached: bool, - flags: u8 + pub(crate) flags: PtraceFlags, } -fn inner_cont(pid: ContextId) -> Option { - // Remove the breakpoint to both save space and also make sure any - // yet unreached but obsolete breakpoints don't stop the program. - let mut sessions = sessions_mut(); - let session = sessions.get_mut(&pid)?; - let breakpoint = session.breakpoint.take()?; - - breakpoint.tracee.notify(); - - Some(breakpoint) -} - -/// Continue the process with the specified ID -pub fn cont(pid: ContextId) { - inner_cont(pid); -} - -/// Create a new breakpoint for the specified tracee, optionally with -/// a sysemu flag. Panics if the session is invalid. -pub fn set_breakpoint(pid: ContextId, flags: u8) { - let tracee = inner_cont(pid) - .map(|b| b.tracee) - .unwrap_or_else(|| Arc::new(WaitCondition::new())); - - let mut sessions = sessions_mut(); - let session = sessions.get_mut(&pid).expect("proc (set_breakpoint): invalid session"); - session.breakpoint = Some(Breakpoint { - tracee, - reached: false, - flags - }); -} - -/// Wait for the tracee to stop. If an event occurs, it returns a copy -/// of that. It will still be available for read using recv_event. +/// Wait for the tracee to stop, or return immediately if there's an unread +/// event. /// -/// Note: Don't call while holding any locks, this will switch -/// contexts -pub fn wait(pid: ContextId) -> Result> { - let tracer: Arc = { - let sessions = sessions(); - match sessions.get(&pid) { - Some(session) if session.breakpoint.as_ref().map(|b| !b.reached).unwrap_or(true) => { - if let Some(event) = session.events.front() { - return Ok(Some(event.clone())); - } - Arc::clone(&session.tracer) - }, - _ => return Ok(None) +/// Note: Don't call while holding any locks or allocated data, this will +/// switch contexts and may in fact just never terminate. +pub fn wait(session: Arc) -> Result<()> { + loop { + // Lock the data, to make sure we're reading the final value before going + // to sleep. + let data = session.data.lock(); + + // Wake up if a breakpoint is already reached or there's an unread event + if data.breakpoint.as_ref().map(|b| b.reached).unwrap_or(false) || !data.events.is_empty() { + break; } - }; - - while !tracer.wait() {} - { - let sessions = sessions(); - if let Some(session) = sessions.get(&pid) { - if let Some(event) = session.events.front() { - return Ok(Some(event.clone())); - } + // Go to sleep, and drop the lock on our data, which will allow other the + // tracer to wake us up. + if session.tracer.wait(data, "ptrace::wait") { + // We successfully waited, wake up! + break; } } - let contexts = context::contexts(); - let context = contexts.get(pid).ok_or(Error::new(ESRCH))?; - let context = context.read(); - if let Status::Exited(_) = context.status { - return Err(Error::new(ESRCH)); - } - - Ok(None) + Ok(()) } -/// Notify the tracer and await green flag to continue. -/// Note: Don't call while holding any locks, this will switch contexts -pub fn breakpoint_callback(match_flags: u8) -> Option { - // Can't hold any locks when executing wait() - let (tracee, flags) = { - let contexts = context::contexts(); - let context = contexts.current()?; - let context = context.read(); - - let mut sessions = sessions_mut(); - let session = sessions.get_mut(&context.id)?; - let breakpoint = session.breakpoint.as_mut()?; - - // TODO: How should singlesteps interact with syscalls? How - // does Linux handle this? - - if breakpoint.flags & PTRACE_OPERATIONMASK != match_flags & PTRACE_OPERATIONMASK { +/// Notify the tracer and await green flag to continue. If the breakpoint was +/// set and reached, return the flags which the user waited for. Otherwise, +/// None. +/// +/// Note: Don't call while holding any locks or allocated data, this +/// will switch contexts and may in fact just never terminate. +pub fn breakpoint_callback( + match_flags: PtraceFlags, + event: Option, +) -> Option { + loop { + let percpu = PercpuBlock::current(); + + // TODO: Some or all flags? + // Only stop if the tracer have asked for this breakpoint + if percpu.ptrace_flags.get().contains(match_flags) { return None; } - // In case no tracer is waiting, make sure the next one gets - // the memo - breakpoint.reached = true; - - session.tracer.notify(); - proc_trigger_event(session.file_id, EVENT_WRITE); - - ( - Arc::clone(&breakpoint.tracee), - breakpoint.flags - ) - }; + let session = percpu.ptrace_session.borrow().as_ref()?.upgrade()?; - while !tracee.wait() {} + let mut data = session.data.lock(); + let breakpoint = data.breakpoint?; // only go to sleep if there's a breakpoint - Some(flags) -} + // In case no tracer is waiting, make sure the next one gets the memo + data.breakpoint + .as_mut() + .expect("already checked that breakpoint isn't None") + .reached = true; -/// Call when a context is closed to alert any tracers -pub fn close_tracee(pid: ContextId) -> Option<()> { - let mut sessions = sessions_mut(); - let session = sessions.get_mut(&pid)?; + // Add event to queue + data.add_event(event.unwrap_or(ptrace_event!(match_flags))); - session.breakpoint = None; - session.tracer.notify(); - Some(()) -} - -// ____ _ _ -// | _ \ ___ __ _(_)___| |_ ___ _ __ ___ -// | |_) / _ \/ _` | / __| __/ _ \ '__/ __| -// | _ < __/ (_| | \__ \ || __/ | \__ \ -// |_| \_\___|\__, |_|___/\__\___|_| |___/ -// |___/ - -pub struct ProcessRegsGuard; - -/// Make all registers available to e.g. the proc: scheme -/// --- -/// For use inside arch-specific code to assign the pointer of the -/// interupt stack to the current process. Meant to reduce the amount -/// of ptrace-related code that has to lie in arch-specific bits. -/// ```rust,ignore -/// let _guard = ptrace::set_process_regs(pointer); -/// ... -/// // (_guard implicitly dropped) -/// ``` -pub fn set_process_regs(pointer: *mut InterruptStack) -> Option { - let contexts = context::contexts(); - let context = contexts.current()?; - let mut context = context.write(); - - let kstack = context.kstack.as_mut()?; - - context.regs = Some((kstack.as_mut_ptr() as usize, Unique::new(pointer))); - Some(ProcessRegsGuard) -} - -impl Drop for ProcessRegsGuard { - fn drop(&mut self) { - fn clear_process_regs() -> Option<()> { - let contexts = context::contexts(); - let context = contexts.current()?; - let mut context = context.write(); + // Wake up sleeping tracer + session.tracer.notify(); - context.regs = None; - Some(()) + if session.tracee.wait(data, "ptrace::breakpoint_callback") { + // We successfully waited, wake up! + break Some(breakpoint.flags); } - clear_process_regs(); } } -/// Return the InterruptStack pointer, but relative to the specified -/// stack instead of the original. -pub unsafe fn rebase_regs_ptr( - regs: Option<(usize, Unique)>, - kstack: Option<&Box<[u8]>> -) -> Option<*const InterruptStack> { - let (old_base, ptr) = regs?; - let new_base = kstack?.as_ptr() as usize; - Some((ptr.as_ptr() as usize - old_base + new_base) as *const _) -} -/// Return the InterruptStack pointer, but relative to the specified -/// stack instead of the original. -pub unsafe fn rebase_regs_ptr_mut( - regs: Option<(usize, Unique)>, - kstack: Option<&mut Box<[u8]>> -) -> Option<*mut InterruptStack> { - let (old_base, ptr) = regs?; - let new_base = kstack?.as_mut_ptr() as usize; - Some((ptr.as_ptr() as usize - old_base + new_base) as *mut _) -} - -/// Return a reference to the InterruptStack struct in memory. If the -/// kernel stack has been backed up by a signal handler, this instead -/// returns the struct inside that memory, as that will later be -/// restored and otherwise undo all your changes. See `update(...)` in -/// context/switch.rs. -pub unsafe fn regs_for(context: &Context) -> Option<&InterruptStack> { - let signal_backup_regs = match context.ksig { - None => None, - Some((_, _, ref kstack, signum)) => { - let is_user_handled = { - let actions = context.actions.lock(); - signal::is_user_handled(actions[signum as usize].0.sa_handler) - }; - if is_user_handled { - None - } else { - Some(rebase_regs_ptr(context.regs, kstack.as_ref())?) - } - } - }; - signal_backup_regs - .or_else(|| context.regs.map(|regs| regs.1.as_ptr() as *const _)) - .map(|ptr| &*ptr) -} - -/// Mutable version of `regs_for` -pub unsafe fn regs_for_mut(context: &mut Context) -> Option<&mut InterruptStack> { - let signal_backup_regs = match context.ksig { - None => None, - Some((_, _, ref mut kstack, signum)) => { - let is_user_handled = { - let actions = context.actions.lock(); - signal::is_user_handled(actions[signum as usize].0.sa_handler) - }; - if is_user_handled { - None - } else { - Some(rebase_regs_ptr_mut(context.regs, kstack.as_mut())?) - } - } - }; - signal_backup_regs - .or_else(|| context.regs.map(|regs| regs.1.as_ptr())) - .map(|ptr| &mut *ptr) -} - -// __ __ -// | \/ | ___ _ __ ___ ___ _ __ _ _ -// | |\/| |/ _ \ '_ ` _ \ / _ \| '__| | | | -// | | | | __/ | | | | | (_) | | | |_| | -// |_| |_|\___|_| |_| |_|\___/|_| \__, | -// |___/ - -pub fn with_context_memory(context: &Context, offset: VirtualAddress, len: usize, f: F) -> Result<()> - where F: FnOnce(*mut u8) -> Result<()> -{ - // TODO: Is using USER_TMP_MISC_OFFSET safe? I guess make sure - // it's not too large. - let start = Page::containing_address(VirtualAddress::new(crate::USER_TMP_MISC_OFFSET)); - - let mut active_page_table = unsafe { ActivePageTable::new() }; - let mut target_page_table = unsafe { - InactivePageTable::from_address(context.arch.get_page_table()) - }; - - // Find the physical frames for all pages - let mut frames = Vec::new(); - - let mut result = None; - active_page_table.with(&mut target_page_table, &mut TemporaryPage::new(start), |mapper| { - let mut inner = || -> Result<()> { - let start = Page::containing_address(offset); - let end = Page::containing_address(VirtualAddress::new(offset.get() + len - 1)); - for page in Page::range_inclusive(start, end) { - frames.push(( - mapper.translate_page(page).ok_or(Error::new(EFAULT))?, - mapper.translate_page_flags(page).ok_or(Error::new(EFAULT))? - )); - } - Ok(()) - }; - result = Some(inner()); - }); - result.expect("with(...) callback should always be called")?; - - // Map all the physical frames into linear pages - let pages = frames.len(); - let mut page = start; - let mut flusher = MapperFlushAll::new(); - for (frame, mut flags) in frames { - flags |= EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE; - flusher.consume(active_page_table.map_to(page, frame, flags)); - - page = page.next(); - } - - flusher.flush(&mut active_page_table); - - let res = f((start.start_address().get() + offset.get() % PAGE_SIZE) as *mut u8); - - // Unmap all the pages (but allow no deallocation!) - let mut page = start; - let mut flusher = MapperFlushAll::new(); - for _ in 0..pages { - flusher.consume(active_page_table.unmap_return(page, true).0); - page = page.next(); - } - - flusher.flush(&mut active_page_table); +/// Obtain the next breakpoint flags for the current process. This is used for +/// detecting whether or not the tracer decided to use sysemu mode. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub fn next_breakpoint() -> Option { + let session = Session::current()?; + let data = session.data.lock(); + let breakpoint = data.breakpoint?; - res + Some(breakpoint.flags) } diff --git a/src/scheme/acpi.rs b/src/scheme/acpi.rs new file mode 100644 index 00000000..fc020776 --- /dev/null +++ b/src/scheme/acpi.rs @@ -0,0 +1,304 @@ +use core::{ + convert::TryInto, + str, + sync::atomic::{self, AtomicUsize}, +}; + +use alloc::{boxed::Box, collections::BTreeMap}; + +use spin::{Mutex, Once, RwLock}; +use syscall::{ + dirent::{DirEntry, DirentBuf, DirentKind}, + EIO, +}; + +use crate::{ + acpi::{RxsdtEnum, RXSDT_ENUM}, + context::file::InternalFlags, + event, + sync::WaitCondition, +}; + +use crate::syscall::{ + data::Stat, + error::{Error, Result, EACCES, EBADF, EBADFD, EINTR, EINVAL, EISDIR, ENOENT, ENOTDIR, EROFS}, + flag::{ + EventFlags, EVENT_READ, MODE_CHR, MODE_DIR, MODE_FILE, O_ACCMODE, O_CREAT, O_DIRECTORY, + O_EXCL, O_RDONLY, O_STAT, O_SYMLINK, + }, + usercopy::UserSliceWo, +}; + +use super::{CallerCtx, GlobalSchemes, KernelScheme, OpenResult}; + +/// A scheme used to access the RSDT or XSDT, which is needed for e.g. `acpid` to function. +pub struct AcpiScheme; + +struct Handle { + kind: HandleKind, + stat: bool, +} +#[derive(Eq, PartialEq)] +enum HandleKind { + TopLevel, + Rxsdt, + ShutdownPipe, +} + +// Using BTreeMap as hashbrown doesn't have a const constructor. +static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); +static NEXT_FD: AtomicUsize = AtomicUsize::new(0); + +static DATA: Once> = Once::new(); + +static KSTOP_WAITCOND: WaitCondition = WaitCondition::new(); +static KSTOP_FLAG: Mutex = Mutex::new(false); + +pub fn register_kstop() -> bool { + *KSTOP_FLAG.lock() = true; + let mut waiters_awoken = KSTOP_WAITCOND.notify(); + + let handles = HANDLES.read(); + + for (&fd, _) in handles + .iter() + .filter(|(_, handle)| handle.kind == HandleKind::ShutdownPipe) + { + event::trigger(GlobalSchemes::Acpi.scheme_id(), fd, EVENT_READ); + waiters_awoken += 1; + } + + if waiters_awoken == 0 { + log::error!("No userspace ACPI handler was notified when trying to shutdown. This is bad."); + // Let the kernel shutdown without ACPI. + return false; + } + + // TODO: Context switch directly to the waiting context, to avoid annoying timeouts. + true +} + +impl AcpiScheme { + pub fn init() { + // NOTE: This __must__ be called from the main kernel context, while initializing all + // schemes. If it is called by any other context, then all ACPI data will probably not even + // be mapped. + + let mut data_init = false; + + DATA.call_once(|| { + data_init = true; + + let table = match RXSDT_ENUM.get() { + Some(RxsdtEnum::Rsdt(rsdt)) => rsdt.as_slice(), + Some(RxsdtEnum::Xsdt(xsdt)) => xsdt.as_slice(), + None => { + log::warn!("expected RXSDT_ENUM to be initialized before AcpiScheme, is ACPI available?"); + &[] + } + }; + + Box::from(table) + }); + + if !data_init { + log::error!("AcpiScheme::init called multiple times"); + } + } +} + +impl KernelScheme for AcpiScheme { + fn kopen(&self, path: &str, flags: usize, ctx: CallerCtx) -> Result { + let path = path.trim_start_matches('/'); + + if ctx.uid != 0 { + return Err(Error::new(EACCES)); + } + if flags & O_CREAT == O_CREAT { + return Err(Error::new(EROFS)); + } + if flags & O_EXCL == O_EXCL || flags & O_SYMLINK == O_SYMLINK { + return Err(Error::new(EINVAL)); + } + if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { + return Err(Error::new(EROFS)); + } + let (handle_kind, int_flags) = match path { + "" => { + if flags & O_DIRECTORY != O_DIRECTORY && flags & O_STAT != O_STAT { + return Err(Error::new(EISDIR)); + } + + (HandleKind::TopLevel, InternalFlags::POSITIONED) + } + "rxsdt" => { + if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { + return Err(Error::new(ENOTDIR)); + } + (HandleKind::Rxsdt, InternalFlags::POSITIONED) + } + "kstop" => { + if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { + return Err(Error::new(ENOTDIR)); + } + (HandleKind::ShutdownPipe, InternalFlags::empty()) + } + _ => return Err(Error::new(ENOENT)), + }; + + let fd = NEXT_FD.fetch_add(1, atomic::Ordering::Relaxed); + let mut handles_guard = HANDLES.write(); + + let _ = handles_guard.insert( + fd, + Handle { + kind: handle_kind, + // TODO: Redundant + stat: flags & O_STAT == O_STAT, + }, + ); + + Ok(OpenResult::SchemeLocal(fd, int_flags)) + } + fn fsize(&self, id: usize) -> Result { + let mut handles = HANDLES.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + + if handle.stat { + return Err(Error::new(EBADF)); + } + + Ok(match handle.kind { + HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?.len() as u64, + HandleKind::ShutdownPipe => 1, + HandleKind::TopLevel => 0, + }) + } + // TODO + fn fevent(&self, id: usize, _flags: EventFlags) -> Result { + let handles = HANDLES.read(); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + + if handle.stat { + return Err(Error::new(EBADF)); + } + + Ok(EventFlags::empty()) + } + fn close(&self, id: usize) -> Result<()> { + if HANDLES.write().remove(&id).is_none() { + return Err(Error::new(EBADF)); + } + Ok(()) + } + fn kreadoff( + &self, + id: usize, + dst_buf: UserSliceWo, + offset: u64, + _flags: u32, + _stored_flags: u32, + ) -> Result { + let Ok(offset) = usize::try_from(offset) else { + return Ok(0); + }; + + let mut handles = HANDLES.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + + if handle.stat { + return Err(Error::new(EBADF)); + } + + let data = match handle.kind { + HandleKind::ShutdownPipe => { + if dst_buf.is_empty() { + return Ok(0); + } + + loop { + let flag_guard = KSTOP_FLAG.lock(); + + if *flag_guard { + break; + } else if !KSTOP_WAITCOND.wait(flag_guard, "waiting for kstop") { + return Err(Error::new(EINTR)); + } + } + + return dst_buf.copy_exactly(&[0x42]).map(|()| 1); + } + HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?, + HandleKind::TopLevel => return Err(Error::new(EISDIR)), + }; + + let src_offset = core::cmp::min(offset, data.len()); + let src_buf = data + .get(src_offset..) + .expect("expected data to be at least data.len() bytes long"); + + dst_buf.copy_common_bytes_from_slice(src_buf) + } + fn getdents( + &self, + id: usize, + buf: UserSliceWo, + header_size: u16, + opaque: u64, + ) -> Result { + let Some(Handle { + kind: HandleKind::TopLevel, + .. + }) = HANDLES.read().get(&id) + else { + return Err(Error::new(ENOTDIR)); + }; + + let mut buf = DirentBuf::new(buf, header_size).ok_or(Error::new(EIO))?; + if opaque == 0 { + buf.entry(DirEntry { + kind: DirentKind::Regular, + name: "rxsdt", + inode: 0, + next_opaque_id: 1, + })?; + } + if opaque <= 1 { + buf.entry(DirEntry { + kind: DirentKind::Socket, + name: "kstop", + inode: 0, + next_opaque_id: u64::MAX, + })?; + } + Ok(buf.finalize()) + } + fn kfstat(&self, id: usize, buf: UserSliceWo) -> Result<()> { + let handles = HANDLES.read(); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + + buf.copy_exactly(&match handle.kind { + HandleKind::Rxsdt => { + let data = DATA.get().ok_or(Error::new(EBADFD))?; + + Stat { + st_mode: MODE_FILE, + st_size: data.len().try_into().unwrap_or(u64::max_value()), + ..Default::default() + } + } + HandleKind::TopLevel => Stat { + st_mode: MODE_DIR, + st_size: 0, + ..Default::default() + }, + HandleKind::ShutdownPipe => Stat { + st_mode: MODE_CHR, + st_size: 1, + ..Default::default() + }, + })?; + + Ok(()) + } +} diff --git a/src/scheme/debug.rs b/src/scheme/debug.rs index 4ec00e5a..b5f965c4 100644 --- a/src/scheme/debug.rs +++ b/src/scheme/debug.rs @@ -1,148 +1,210 @@ use core::sync::atomic::{AtomicUsize, Ordering}; -use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; - -use crate::arch::debug::Writer; -use crate::event; -use crate::scheme::*; -use crate::sync::WaitQueue; -use crate::syscall::flag::{EVENT_READ, F_GETFL, F_SETFL, O_ACCMODE, O_NONBLOCK}; -use crate::syscall::scheme::Scheme; - -pub static DEBUG_SCHEME_ID: AtomicSchemeId = ATOMIC_SCHEMEID_INIT; - -/// Input queue -static INPUT: Once> = Once::new(); - -/// Initialize input queue, called if needed -fn init_input() -> WaitQueue { - WaitQueue::new() -} +use spin::RwLock; + +use crate::{ + arch::debug::Writer, + devices::graphical_debug, + event, + scheme::*, + sync::WaitQueue, + syscall::{ + flag::{EventFlags, EVENT_READ, O_NONBLOCK}, + usercopy::{UserSliceRo, UserSliceWo}, + }, +}; static NEXT_ID: AtomicUsize = AtomicUsize::new(0); -static HANDLES: Once>> = Once::new(); +/// Input queue +static INPUT: WaitQueue = WaitQueue::new(); -fn init_handles() -> RwLock> { - RwLock::new(BTreeMap::new()) +#[derive(Clone, Copy)] +struct Handle { + num: usize, } -fn handles() -> RwLockReadGuard<'static, BTreeMap> { - HANDLES.call_once(init_handles).read() -} +// Using BTreeMap as hashbrown doesn't have a const constructor. +static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); -fn handles_mut() -> RwLockWriteGuard<'static, BTreeMap> { - HANDLES.call_once(init_handles).write() +/// Add to the input queue +pub fn debug_input(data: u8) { + INPUT.send(data); } -/// Add to the input queue -pub fn debug_input(b: u8) { - INPUT.call_once(init_input).send(b); - for (id, _flags) in handles().iter() { - event::trigger(DEBUG_SCHEME_ID.load(Ordering::SeqCst), *id, EVENT_READ); +// Notify readers of input updates +pub fn debug_notify() { + for (id, _handle) in HANDLES.read().iter() { + event::trigger(GlobalSchemes::Debug.scheme_id(), *id, EVENT_READ); } } pub struct DebugScheme; -impl DebugScheme { - pub fn new(scheme_id: SchemeId) -> DebugScheme { - DEBUG_SCHEME_ID.store(scheme_id, Ordering::SeqCst); - DebugScheme - } +#[repr(usize)] +enum SpecialFds { + Default = !0, + NoPreserve = !0 - 1, + DisableGraphicalDebug = !0 - 2, + + #[cfg(feature = "profiling")] + CtlProfiling = !0 - 3, } -impl Scheme for DebugScheme { - fn open(&self, _path: &[u8], flags: usize, _uid: u32, _gid: u32) -> Result { - let id = NEXT_ID.fetch_add(1, Ordering::SeqCst); - handles_mut().insert(id, flags & ! O_ACCMODE); +impl KernelScheme for DebugScheme { + fn kopen(&self, path: &str, _flags: usize, ctx: CallerCtx) -> Result { + if ctx.uid != 0 { + return Err(Error::new(EPERM)); + } - Ok(id) - } + let num = match path { + "" => SpecialFds::Default as usize, - /// Read the file `number` into the `buffer` - /// - /// Returns the number of bytes read - fn read(&self, id: usize, buf: &mut [u8]) -> Result { - let flags = { - let handles = handles(); - *handles.get(&id).ok_or(Error::new(EBADF))? + "no-preserve" => SpecialFds::NoPreserve as usize, + + "disable-graphical-debug" => SpecialFds::DisableGraphicalDebug as usize, + + #[cfg(feature = "profiling")] + p if p.starts_with("profiling-") => { + path[10..].parse().map_err(|_| Error::new(ENOENT))? + } + + #[cfg(feature = "profiling")] + "ctl-profiling" => SpecialFds::CtlProfiling as usize, + + _ => return Err(Error::new(ENOENT)), }; - INPUT.call_once(init_input) - .receive_into(buf, flags & O_NONBLOCK != O_NONBLOCK) - .ok_or(Error::new(EINTR)) + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + HANDLES.write().insert(id, Handle { num }); + + Ok(OpenResult::SchemeLocal(id, InternalFlags::empty())) } - /// Write the `buffer` to the `file` - /// - /// Returns the number of bytes written - fn write(&self, id: usize, buf: &[u8]) -> Result { - let _flags = { - let handles = handles(); + fn fevent(&self, id: usize, _flags: EventFlags) -> Result { + let _handle = { + let handles = HANDLES.read(); *handles.get(&id).ok_or(Error::new(EBADF))? }; - Writer::new().write(buf); - Ok(buf.len()) - } - - fn fcntl(&self, id: usize, cmd: usize, arg: usize) -> Result { - let mut handles = handles_mut(); - if let Some(flags) = handles.get_mut(&id) { - match cmd { - F_GETFL => Ok(*flags), - F_SETFL => { - *flags = arg & ! O_ACCMODE; - Ok(0) - }, - _ => Err(Error::new(EINVAL)) - } - } else { - Err(Error::new(EBADF)) - } + Ok(EventFlags::empty()) } - fn fevent(&self, id: usize, _flags: usize) -> Result { - let _flags = { - let handles = handles(); + fn fsync(&self, id: usize) -> Result<()> { + let _handle = { + let handles = HANDLES.read(); *handles.get(&id).ok_or(Error::new(EBADF))? }; - Ok(0) + Ok(()) } - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let _flags = { - let handles = handles(); + fn close(&self, id: usize) -> Result<()> { + let _handle = { + let mut handles = HANDLES.write(); + handles.remove(&id).ok_or(Error::new(EBADF))? + }; + + Ok(()) + } + fn kread(&self, id: usize, buf: UserSliceWo, flags: u32, _stored_flags: u32) -> Result { + let handle = { + let handles = HANDLES.read(); *handles.get(&id).ok_or(Error::new(EBADF))? }; - let mut i = 0; - let scheme_path = b"debug:"; - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; + if handle.num == SpecialFds::DisableGraphicalDebug as usize { + return Err(Error::new(EBADF)); + } + + #[cfg(feature = "profiling")] + if handle.num == SpecialFds::CtlProfiling as usize { + return Err(Error::new(EBADF)); + } + + #[cfg(feature = "profiling")] + if handle.num != SpecialFds::Default as usize { + return crate::profiling::drain_buffer( + crate::cpu_set::LogicalCpuId::new(handle.num as u32), + buf, + ); } - Ok(i) + INPUT.receive_into_user(buf, flags & O_NONBLOCK as u32 == 0, "DebugScheme::read") } - fn fsync(&self, id: usize) -> Result { - let _flags = { - let handles = handles(); + fn kwrite( + &self, + id: usize, + buf: UserSliceRo, + _flags: u32, + _stored_flags: u32, + ) -> Result { + let handle = { + let handles = HANDLES.read(); *handles.get(&id).ok_or(Error::new(EBADF))? }; - Ok(0) - } + #[cfg(feature = "profiling")] + if handle.num == SpecialFds::CtlProfiling as usize { + let mut dst = [0]; + buf.copy_to_slice(&mut dst)?; - /// Close the file `number` - fn close(&self, id: usize) -> Result { - let _flags = { - let mut handles = handles_mut(); - handles.remove(&id).ok_or(Error::new(EBADF))? + let is_profiling = match dst[0] { + b'0' => false, + b'1' => true, + _ => return Err(Error::new(EINVAL)), + }; + log::info!("Wrote {is_profiling} to IS_PROFILING"); + crate::profiling::IS_PROFILING.store(is_profiling, Ordering::Relaxed); + + return Ok(1); + } + + if handle.num == SpecialFds::DisableGraphicalDebug as usize { + #[cfg(feature = "graphical_debug")] + graphical_debug::fini(); + + return Ok(0); + } + + if handle.num != SpecialFds::Default as usize + && handle.num != SpecialFds::NoPreserve as usize + { + return Err(Error::new(EINVAL)); + } + + let mut tmp = [0_u8; 512]; + + for chunk in buf.in_variable_chunks(tmp.len()) { + let byte_count = chunk.copy_common_bytes_to_slice(&mut tmp)?; + let tmp_bytes = &tmp[..byte_count]; + + // The reason why a new writer is created for each iteration, is because the page fault + // handler in usercopy might use the same lock when printing for debug purposes, and + // although it most likely won't, it would be dangerous to rely on that assumption. + Writer::new().write(tmp_bytes, handle.num != SpecialFds::NoPreserve as usize); + } + + Ok(buf.len()) + } + fn kfpath(&self, id: usize, buf: UserSliceWo) -> Result { + let handle = { + let handles = HANDLES.read(); + *handles.get(&id).ok_or(Error::new(EBADF))? }; + if handle.num != SpecialFds::Default as usize + && handle.num != SpecialFds::NoPreserve as usize + { + return Err(Error::new(EINVAL)); + } + + // TODO: Copy elsewhere in the kernel? + const SRC: &[u8] = b"debug:"; + let byte_count = core::cmp::min(buf.len(), SRC.len()); + buf.limit(byte_count) + .expect("must succeed") + .copy_from_slice(&SRC[..byte_count])?; - Ok(0) + Ok(byte_count) } } diff --git a/src/scheme/dtb.rs b/src/scheme/dtb.rs new file mode 100644 index 00000000..bf44b096 --- /dev/null +++ b/src/scheme/dtb.rs @@ -0,0 +1,144 @@ +use core::sync::atomic::{self, AtomicUsize}; + +use alloc::{boxed::Box, collections::BTreeMap}; +use spin::{Once, RwLock}; + +use super::{CallerCtx, KernelScheme, OpenResult}; +use crate::{ + dtb::DTB_BINARY, + scheme::InternalFlags, + syscall::{ + data::Stat, + error::*, + flag::{MODE_FILE, O_STAT}, + usercopy::UserSliceWo, + }, +}; + +pub struct DtbScheme; + +#[derive(Eq, PartialEq)] +enum HandleKind { + RawData, +} + +struct Handle { + kind: HandleKind, + stat: bool, +} + +static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); +static NEXT_FD: AtomicUsize = AtomicUsize::new(0); +static DATA: Once> = Once::new(); + +impl DtbScheme { + pub fn init() { + let mut data_init = false; + + DATA.call_once(|| { + data_init = true; + + let dtb = match DTB_BINARY.get() { + Some(dtb) => dtb.as_slice(), + None => &[], + }; + + Box::from(dtb) + }); + + if !data_init { + log::error!("DtbScheme::new called multiple times"); + } + } +} + +impl KernelScheme for DtbScheme { + fn kopen(&self, path: &str, _flags: usize, _ctx: CallerCtx) -> Result { + let path = path.trim_matches('/'); + + if path.is_empty() { + let id = NEXT_FD.fetch_add(1, atomic::Ordering::Relaxed); + + let mut handles_guard = HANDLES.write(); + + let _ = handles_guard.insert( + id, + Handle { + kind: HandleKind::RawData, + stat: _flags & O_STAT == O_STAT, + }, + ); + return Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED)); + } + + Err(Error::new(ENOENT)) + } + + fn fsize(&self, id: usize) -> Result { + let mut handles = HANDLES.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + + if handle.stat { + return Err(Error::new(EBADF)); + } + + let file_len = match handle.kind { + HandleKind::RawData => DATA.get().ok_or(Error::new(EBADFD))?.len(), + }; + + Ok(file_len as u64) + } + + fn close(&self, id: usize) -> Result<()> { + if HANDLES.write().remove(&id).is_none() { + return Err(Error::new(EBADF)); + } + Ok(()) + } + + fn kreadoff( + &self, + id: usize, + dst_buf: UserSliceWo, + offset: u64, + _flags: u32, + _stored_flags: u32, + ) -> Result { + let mut handles = HANDLES.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + + if handle.stat { + return Err(Error::new(EBADF)); + } + + let data = match handle.kind { + HandleKind::RawData => DATA.get().ok_or(Error::new(EBADFD))?, + }; + + let src_offset = core::cmp::min(offset.try_into().unwrap(), data.len()); + let src_buf = data + .get(src_offset..) + .expect("expected data to be at least data.len() bytes long"); + + dst_buf.copy_common_bytes_from_slice(src_buf) + } + + fn kfstat(&self, id: usize, buf: UserSliceWo) -> Result<()> { + let handles = HANDLES.read(); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + buf.copy_exactly(&match handle.kind { + HandleKind::RawData => { + let data = DATA.get().ok_or(Error::new(EBADFD))?; + Stat { + st_mode: MODE_FILE, + st_uid: 0, + st_gid: 0, + st_size: data.len().try_into().unwrap_or(u64::max_value()), + ..Default::default() + } + } + })?; + + Ok(()) + } +} diff --git a/src/scheme/event.rs b/src/scheme/event.rs index e7ea1ffc..1ebdcb4a 100644 --- a/src/scheme/event.rs +++ b/src/scheme/event.rs @@ -1,22 +1,37 @@ use alloc::sync::Arc; -use core::{mem, slice}; +use core::mem; +use syscall::O_NONBLOCK; -use crate::event::{EventQueue, EventQueueId, next_queue_id, queues, queues_mut}; -use crate::syscall::data::Event; -use crate::syscall::error::*; -use crate::syscall::scheme::Scheme; +use crate::{ + context::file::InternalFlags, + event::{next_queue_id, queues, queues_mut, EventQueue, EventQueueId}, + syscall::{ + data::Event, + error::*, + usercopy::{UserSliceRo, UserSliceWo}, + }, +}; + +use super::{CallerCtx, KernelScheme, OpenResult}; pub struct EventScheme; -impl Scheme for EventScheme { - fn open(&self, _path: &[u8], _flags: usize, _uid: u32, _gid: u32) -> Result { +impl KernelScheme for EventScheme { + fn kopen(&self, _path: &str, _flags: usize, _ctx: CallerCtx) -> Result { let id = next_queue_id(); queues_mut().insert(id, Arc::new(EventQueue::new(id))); - Ok(id.into()) + Ok(OpenResult::SchemeLocal(id.get(), InternalFlags::empty())) } - fn read(&self, id: usize, buf: &mut [u8]) -> Result { + fn close(&self, id: usize) -> Result<()> { + let id = EventQueueId::from(id); + queues_mut() + .remove(&id) + .ok_or(Error::new(EBADF)) + .and(Ok(())) + } + fn kread(&self, id: usize, buf: UserSliceWo, flags: u32, _stored_flags: u32) -> Result { let id = EventQueueId::from(id); let queue = { @@ -25,11 +40,16 @@ impl Scheme for EventScheme { handle.clone() }; - let event_buf = unsafe { slice::from_raw_parts_mut(buf.as_mut_ptr() as *mut Event, buf.len()/mem::size_of::()) }; - Ok(queue.read(event_buf)? * mem::size_of::()) + queue.read(buf, flags & O_NONBLOCK as u32 == 0) } - fn write(&self, id: usize, buf: &[u8]) -> Result { + fn kwrite( + &self, + id: usize, + buf: UserSliceRo, + _flags: u32, + _stored_flags: u32, + ) -> Result { let id = EventQueueId::from(id); let queue = { @@ -37,37 +57,20 @@ impl Scheme for EventScheme { let handle = handles.get(&id).ok_or(Error::new(EBADF))?; handle.clone() }; - - let event_buf = unsafe { slice::from_raw_parts(buf.as_ptr() as *const Event, buf.len()/mem::size_of::()) }; - Ok(queue.write(event_buf)? * mem::size_of::()) - } - - fn fcntl(&self, id: usize, _cmd: usize, _arg: usize) -> Result { - let id = EventQueueId::from(id); - - let handles = queues(); - handles.get(&id).ok_or(Error::new(EBADF)).and(Ok(0)) - } - - fn fpath(&self, _id: usize, buf: &mut [u8]) -> Result { - let mut i = 0; - let scheme_path = b"event:"; - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; + let mut events_written = 0; + + for chunk in buf.in_exact_chunks(mem::size_of::()) { + let event = unsafe { chunk.read_exact::()? }; + if queue.write(&[event])? == 0 { + break; + } + events_written += 1; } - Ok(i) - } - fn fsync(&self, id: usize) -> Result { - let id = EventQueueId::from(id); - - let handles = queues(); - handles.get(&id).ok_or(Error::new(EBADF)).and(Ok(0)) + Ok(events_written * mem::size_of::()) } - fn close(&self, id: usize) -> Result { - let id = EventQueueId::from(id); - queues_mut().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) + fn kfpath(&self, _id: usize, buf: UserSliceWo) -> Result { + buf.copy_common_bytes_from_slice(b"event:") } } diff --git a/src/scheme/initfs.rs b/src/scheme/initfs.rs deleted file mode 100644 index 5af8b3bc..00000000 --- a/src/scheme/initfs.rs +++ /dev/null @@ -1,144 +0,0 @@ -use alloc::collections::BTreeMap; -use core::{cmp, str}; -use core::sync::atomic::{AtomicUsize, Ordering}; -use spin::RwLock; - -use crate::syscall::data::Stat; -use crate::syscall::error::*; -use crate::syscall::flag::{MODE_DIR, MODE_FILE, SEEK_SET, SEEK_CUR, SEEK_END}; -use crate::syscall::scheme::Scheme; - -#[cfg(test)] -mod gen { - use alloc::collections::BTreeMap; - pub fn gen() -> BTreeMap<&'static [u8], (&'static [u8], bool)> { BTreeMap::new() } -} - -#[cfg(not(test))] -include!(concat!(env!("OUT_DIR"), "/gen.rs")); - -struct Handle { - path: &'static [u8], - data: &'static [u8], - mode: u16, - seek: usize -} - -pub struct InitFsScheme { - next_id: AtomicUsize, - files: BTreeMap<&'static [u8], (&'static [u8], bool)>, - handles: RwLock> -} - -impl InitFsScheme { - pub fn new() -> InitFsScheme { - InitFsScheme { - next_id: AtomicUsize::new(0), - files: gen::gen(), - handles: RwLock::new(BTreeMap::new()) - } - } -} - -impl Scheme for InitFsScheme { - fn open(&self, path: &[u8], _flags: usize, _uid: u32, _gid: u32) -> Result { - let path_utf8 = str::from_utf8(path).or(Err(Error::new(ENOENT)))?; - let path_trimmed = path_utf8.trim_matches('/'); - - //Have to iterate to get the path without allocation - for entry in self.files.iter() { - if entry.0 == &path_trimmed.as_bytes() { - let id = self.next_id.fetch_add(1, Ordering::SeqCst); - self.handles.write().insert(id, Handle { - path: entry.0, - data: (entry.1).0, - mode: if (entry.1).1 { MODE_DIR | 0o755 } else { MODE_FILE | 0o744 }, - seek: 0 - }); - - return Ok(id); - } - } - - Err(Error::new(ENOENT)) - } - - fn read(&self, id: usize, buffer: &mut [u8]) -> Result { - let mut handles = self.handles.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - - let mut i = 0; - while i < buffer.len() && handle.seek < handle.data.len() { - buffer[i] = handle.data[handle.seek]; - i += 1; - handle.seek += 1; - } - - Ok(i) - } - - fn seek(&self, id: usize, pos: usize, whence: usize) -> Result { - let mut handles = self.handles.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - - handle.seek = match whence { - SEEK_SET => cmp::min(handle.data.len(), pos), - SEEK_CUR => cmp::max(0, cmp::min(handle.data.len() as isize, handle.seek as isize + pos as isize)) as usize, - SEEK_END => cmp::max(0, cmp::min(handle.data.len() as isize, handle.data.len() as isize + pos as isize)) as usize, - _ => return Err(Error::new(EINVAL)) - }; - - Ok(handle.seek) - } - - fn fcntl(&self, id: usize, _cmd: usize, _arg: usize) -> Result { - let handles = self.handles.read(); - let _handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - Ok(0) - } - - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - //TODO: Copy scheme part in kernel - let mut i = 0; - let scheme_path = b"initfs:"; - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; - } - - let mut j = 0; - while i < buf.len() && j < handle.path.len() { - buf[i] = handle.path[j]; - i += 1; - j += 1; - } - - Ok(i) - } - - fn fstat(&self, id: usize, stat: &mut Stat) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - stat.st_mode = handle.mode; - stat.st_uid = 0; - stat.st_gid = 0; - stat.st_size = handle.data.len() as u64; - - Ok(0) - } - - fn fsync(&self, id: usize) -> Result { - let handles = self.handles.read(); - let _handle = handles.get(&id).ok_or(Error::new(EBADF))?; - Ok(0) - } - - fn close(&self, id: usize) -> Result { - self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) - } -} diff --git a/src/scheme/irq.rs b/src/scheme/irq.rs index eb51768b..deeeedb7 100644 --- a/src/scheme/irq.rs +++ b/src/scheme/irq.rs @@ -1,111 +1,526 @@ -use core::{mem, str}; -use core::sync::atomic::Ordering; -use spin::Mutex; +// TODO: Rewrite this entire scheme. Legacy x86 APIs should be abstracted by a userspace scheme, +// this scheme should only handle raw IRQ registration and delivery to userspace. -use crate::event; -use crate::interrupt::irq::acknowledge; -use crate::scheme::{AtomicSchemeId, ATOMIC_SCHEMEID_INIT, SchemeId}; -use crate::syscall::error::*; -use crate::syscall::flag::EVENT_READ; -use crate::syscall::scheme::Scheme; +use core::{ + mem, str, + str::FromStr, + sync::atomic::{AtomicUsize, Ordering}, +}; -pub static IRQ_SCHEME_ID: AtomicSchemeId = ATOMIC_SCHEMEID_INIT; +use alloc::{collections::BTreeMap, string::String, vec::Vec}; +use spin::{Mutex, Once, RwLock}; +use syscall::dirent::{DirEntry, DirentBuf, DirentKind}; + +use crate::context::file::InternalFlags; + +use super::{CallerCtx, GlobalSchemes, OpenResult}; +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +use crate::arch::interrupt::{available_irqs_iter, irq::acknowledge, is_reserved, set_reserved}; +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +use crate::dtb::irqchip::{acknowledge, available_irqs_iter, is_reserved, set_reserved, IRQ_CHIP}; +use crate::{ + cpu_set::LogicalCpuId, + event, + syscall::{ + data::Stat, + error::*, + flag::{EventFlags, EVENT_READ, MODE_CHR, MODE_DIR, O_CREAT, O_DIRECTORY, O_STAT}, + usercopy::{UserSliceRo, UserSliceWo}, + }, +}; + +/// /// IRQ queues -static ACKS: Mutex<[usize; 16]> = Mutex::new([0; 16]); -static COUNTS: Mutex<[usize; 16]> = Mutex::new([0; 16]); +pub(super) static COUNTS: Mutex<[usize; 224]> = Mutex::new([0; 224]); +// Using BTreeMap as hashbrown doesn't have a const constructor. +static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); + +/// These are IRQs 0..=15 (corresponding to interrupt vectors 32..=47). They are opened without the +/// O_CREAT flag. +const BASE_IRQ_COUNT: u8 = 16; + +/// These are the extended IRQs, 16..=223 (interrupt vectors 48..=255). Some of them are reserved +/// for other devices, and some other interrupt vectors like 0x80 (software interrupts) and +/// 0x40..=0x43 (IPI). +/// +/// Since these are non-sharable, they must be opened with O_CREAT, which then reserves them. They +/// are only freed when the file descriptor is closed. +const TOTAL_IRQ_COUNT: u8 = 224; + +const INO_TOPLEVEL: u64 = 0x8002_0000_0000_0000; +const INO_AVAIL: u64 = 0x8000_0000_0000_0000; +const INO_BSP: u64 = 0x8001_0000_0000_0000; +const INO_PHANDLE: u64 = 0x8003_0000_0000_0000; /// Add to the input queue #[no_mangle] -pub extern fn irq_trigger(irq: u8) { +pub extern "C" fn irq_trigger(irq: u8) { COUNTS.lock()[irq as usize] += 1; - event::trigger(IRQ_SCHEME_ID.load(Ordering::SeqCst), irq as usize, EVENT_READ); + + for (fd, _) in HANDLES + .read() + .iter() + .filter_map(|(fd, handle)| Some((fd, handle.as_irq_handle()?))) + .filter(|&(_, (_, handle_irq))| handle_irq == irq) + { + event::trigger(GlobalSchemes::Irq.scheme_id(), *fd, EVENT_READ); + } } +#[allow(dead_code)] +enum Handle { + Irq { ack: AtomicUsize, irq: u8 }, + Avail(LogicalCpuId), + TopLevel, + Phandle(u8, Vec), + Bsp, +} +impl Handle { + fn as_irq_handle<'a>(&'a self) -> Option<(&'a AtomicUsize, u8)> { + match self { + &Self::Irq { ref ack, irq } => Some((ack, irq)), + _ => None, + } + } +} + +static NEXT_FD: AtomicUsize = AtomicUsize::new(1); +static CPUS: Once> = Once::new(); + pub struct IrqScheme; impl IrqScheme { - pub fn new(scheme_id: SchemeId) -> IrqScheme { - IRQ_SCHEME_ID.store(scheme_id, Ordering::SeqCst); - IrqScheme + pub fn init() { + #[cfg(all(feature = "acpi", any(target_arch = "x86", target_arch = "x86_64")))] + let cpus = { + use crate::acpi::madt::*; + + match madt() { + Some(madt) => madt + .iter() + .filter_map(|entry| match entry { + MadtEntry::LocalApic(apic) => Some(apic.processor), + _ => None, + }) + .collect::>(), + None => { + log::warn!("no MADT found, defaulting to 1 CPU"); + vec![0] + } + } + }; + #[cfg(not(all(feature = "acpi", any(target_arch = "x86", target_arch = "x86_64"))))] + let cpus = vec![0]; + + CPUS.call_once(|| cpus); } + fn open_ext_irq( + flags: usize, + cpu_id: LogicalCpuId, + path_str: &str, + ) -> Result<(Handle, InternalFlags)> { + let irq_number = u8::from_str(path_str).or(Err(Error::new(ENOENT)))?; + + Ok( + if irq_number < BASE_IRQ_COUNT && cpu_id == LogicalCpuId::BSP { + // Give legacy IRQs only to `irq:{0..15}` and `irq:cpu-/{0..15}` (same handles). + // + // The only CPUs don't have the legacy IRQs in their IDTs. + + ( + Handle::Irq { + ack: AtomicUsize::new(0), + irq: irq_number, + }, + InternalFlags::empty(), + ) + } else if irq_number < TOTAL_IRQ_COUNT { + if flags & O_CREAT == 0 && flags & O_STAT == 0 { + return Err(Error::new(EINVAL)); + } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if flags & O_STAT == 0 { + if is_reserved(cpu_id, irq_to_vector(irq_number)) { + return Err(Error::new(EEXIST)); + } + set_reserved(cpu_id, irq_to_vector(irq_number), true); + } + ( + Handle::Irq { + ack: AtomicUsize::new(0), + irq: irq_number, + }, + InternalFlags::empty(), + ) + } else { + return Err(Error::new(ENOENT)); + }, + ) + } + + #[cfg(dtb)] + unsafe fn open_phandle_irq( + flags: usize, + phandle: usize, + path_str: &str, + ) -> Result<(Handle, InternalFlags)> { + let addr: Vec = path_str + .split(',') + .map(|x| u32::from_str(x).or(Err(Error::new(ENOENT)))) + .try_collect()?; + let ic_idx = IRQ_CHIP + .phandle_to_ic_idx(phandle as u32) + .ok_or(Error::new(ENOENT))?; + Ok({ + if flags & O_CREAT == 0 && flags & O_STAT == 0 { + return Err(Error::new(EINVAL)); + } + let irq_number = IRQ_CHIP + .irq_xlate(ic_idx, addr.as_slice()) + .or(Err(Error::new(ENOENT)))?; + log::debug!("open_phandle_irq virq={}", irq_number); + if flags & O_STAT == 0 { + if is_reserved(LogicalCpuId::new(0), irq_number as u8) { + return Err(Error::new(EEXIST)); + } + set_reserved(LogicalCpuId::new(0), irq_number as u8, true); + } + ( + Handle::Irq { + ack: AtomicUsize::new(0), + irq: irq_number as u8, + }, + InternalFlags::empty(), + ) + }) + } +} + +const fn irq_to_vector(irq: u8) -> u8 { + irq + 32 +} +const fn vector_to_irq(vector: u8) -> u8 { + vector - 32 } -impl Scheme for IrqScheme { - fn open(&self, path: &[u8], _flags: usize, uid: u32, _gid: u32) -> Result { - if uid == 0 { - let path_str = str::from_utf8(path).or(Err(Error::new(ENOENT)))?; +impl crate::scheme::KernelScheme for IrqScheme { + fn kopen(&self, path: &str, flags: usize, ctx: CallerCtx) -> Result { + if ctx.uid != 0 { + return Err(Error::new(EACCES)); + } - let id = path_str.parse::().or(Err(Error::new(ENOENT)))?; + let path_str = path.trim_start_matches('/'); - if id < COUNTS.lock().len() { - Ok(id) - } else { - Err(Error::new(ENOENT)) + let (handle, int_flags) = if path_str.is_empty() { + if flags & O_DIRECTORY == 0 && flags & O_STAT == 0 { + return Err(Error::new(EISDIR)); + } + // list every logical CPU in the format of e.g. `cpu-1b` + + let mut bytes = String::new(); + + use core::fmt::Write; + + writeln!(bytes, "bsp").unwrap(); + + for cpu_id in CPUS.get().expect("IRQ scheme not initialized") { + writeln!(bytes, "cpu-{:02x}", cpu_id).unwrap(); + } + + #[cfg(dtb)] + unsafe { + for chip in &IRQ_CHIP.irq_chip_list.chips { + writeln!(bytes, "phandle-{}", chip.phandle).unwrap(); + } } + + (Handle::TopLevel, InternalFlags::POSITIONED) } else { - Err(Error::new(EACCES)) - } - } + if path_str == "bsp" { + (Handle::Bsp, InternalFlags::empty()) + } else if path_str.starts_with("cpu-") { + let path_str = &path_str[4..]; + let cpu_id = u8::from_str_radix(&path_str[..2], 16).or(Err(Error::new(ENOENT)))?; + let path_str = path_str[2..].trim_end_matches('/'); - fn read(&self, file: usize, buffer: &mut [u8]) -> Result { - // Ensures that the length of the buffer is larger than the size of a usize - if buffer.len() >= mem::size_of::() { - let ack = ACKS.lock()[file]; - let current = COUNTS.lock()[file]; - if ack != current { - // Safe if the length of the buffer is larger than the size of a usize - assert!(buffer.len() >= mem::size_of::()); - unsafe { *(buffer.as_mut_ptr() as *mut usize) = current; } - Ok(mem::size_of::()) + if path_str.is_empty() { + ( + Handle::Avail(LogicalCpuId::new(cpu_id.into())), + InternalFlags::POSITIONED, + ) + } else if path_str.starts_with('/') { + let path_str = &path_str[1..]; + Self::open_ext_irq(flags, LogicalCpuId::new(cpu_id.into()), path_str)? + } else { + return Err(Error::new(ENOENT)); + } + } else if cfg!(dtb) && path_str.starts_with("phandle-") { + #[cfg(dtb)] + unsafe { + let (phandle_str, path_str) = + path_str[8..].split_once('/').unwrap_or((path_str, "")); + let phandle = usize::from_str(phandle_str).or(Err(Error::new(ENOENT)))?; + if path_str.is_empty() { + let has_any = IRQ_CHIP.irq_iter_for(phandle as u32).next().is_some(); + if has_any { + let data = String::new(); + ( + Handle::Phandle(phandle as u8, data.into_bytes()), + InternalFlags::POSITIONED, + ) + } else { + return Err(Error::new(ENOENT)); + } + } else { + Self::open_phandle_irq(flags, phandle, path_str)? + } + } + #[cfg(not(dtb))] + panic!("") + } else if let Ok(plain_irq_number) = u8::from_str(path_str) { + if plain_irq_number < BASE_IRQ_COUNT { + ( + Handle::Irq { + ack: AtomicUsize::new(0), + irq: plain_irq_number, + }, + InternalFlags::empty(), + ) + } else { + return Err(Error::new(ENOENT)); + } } else { - Ok(0) + return Err(Error::new(ENOENT)); } - } else { - Err(Error::new(EINVAL)) - } + }; + let fd = NEXT_FD.fetch_add(1, Ordering::Relaxed); + HANDLES.write().insert(fd, handle); + Ok(OpenResult::SchemeLocal(fd, int_flags)) } + fn getdents( + &self, + id: usize, + buf: UserSliceWo, + header_size: u16, + opaque_id_start: u64, + ) -> Result { + let Ok(opaque) = usize::try_from(opaque_id_start) else { + return Ok(0); + }; - fn write(&self, file: usize, buffer: &[u8]) -> Result { - if buffer.len() >= mem::size_of::() { - assert!(buffer.len() >= mem::size_of::()); - let ack = unsafe { *(buffer.as_ptr() as *const usize) }; - let current = COUNTS.lock()[file]; - if ack == current { - ACKS.lock()[file] = ack; - unsafe { acknowledge(file); } - Ok(mem::size_of::()) - } else { - Ok(0) + use core::fmt::Write; + + let mut buf = DirentBuf::new(buf, header_size).ok_or(Error::new(EIO))?; + let mut intermediate = String::new(); + + match *HANDLES.read().get(&id).ok_or(Error::new(EBADF))? { + Handle::TopLevel => { + let cpus = CPUS.get().expect("IRQ scheme not initialized"); + + if opaque == 0 { + buf.entry(DirEntry { + inode: 0, + next_opaque_id: 1, + kind: DirentKind::CharDev, + name: "bsp", + })?; + } + + // list every logical CPU in the format of e.g. `cpu-1b` + for cpu_id in cpus.iter().filter(|i| opaque <= usize::from(**i)) { + intermediate.clear(); + write!(&mut intermediate, "cpu-{:02x}", cpu_id).unwrap(); + buf.entry(DirEntry { + kind: DirentKind::Directory, + name: &intermediate, + inode: 0, + next_opaque_id: u64::from(*cpu_id + 1), + })?; + } } - } else { - Err(Error::new(EINVAL)) + Handle::Avail(cpu_id) => { + for vector in available_irqs_iter(cpu_id).skip(opaque) { + let irq = vector_to_irq(vector); + if cpu_id == LogicalCpuId::BSP && irq < BASE_IRQ_COUNT { + continue; + } + intermediate.clear(); + write!(intermediate, "{}", irq).unwrap(); + buf.entry(DirEntry { + inode: 0, + kind: DirentKind::CharDev, + name: &intermediate, + next_opaque_id: u64::from(vector) + 1, + })?; + } + } + _ => return Err(Error::new(ENOTDIR)), } + Ok(buf.finalize()) } fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { Ok(0) } - fn fevent(&self, _id: usize, _flags: usize) -> Result { - Ok(0) + fn fevent(&self, _id: usize, _flags: EventFlags) -> Result { + Ok(EventFlags::empty()) } - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let mut i = 0; - let scheme_path = format!("irq:{}", id).into_bytes(); - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; + fn fsync(&self, _file: usize) -> Result<()> { + Ok(()) + } + + fn close(&self, id: usize) -> Result<()> { + let handles_guard = HANDLES.read(); + let handle = handles_guard.get(&id).ok_or(Error::new(EBADF))?; + + if let &Handle::Irq { + irq: handle_irq, .. + } = handle + { + if handle_irq > BASE_IRQ_COUNT { + set_reserved(LogicalCpuId::BSP, irq_to_vector(handle_irq), false); + } } - Ok(i) + Ok(()) } + fn kwrite( + &self, + file: usize, + buffer: UserSliceRo, + _flags: u32, + _stored_flags: u32, + ) -> Result { + let handles_guard = HANDLES.read(); + let handle = handles_guard.get(&file).ok_or(Error::new(EBADF))?; - fn fsync(&self, _file: usize) -> Result { - Ok(0) + match handle { + &Handle::Irq { + irq: handle_irq, + ack: ref handle_ack, + } => { + if buffer.len() < mem::size_of::() { + return Err(Error::new(EINVAL)); + } + let ack = buffer.read_usize()?; + let current = COUNTS.lock()[handle_irq as usize]; + + if ack != current { + return Ok(0); + } + handle_ack.store(ack, Ordering::SeqCst); + unsafe { + acknowledge(handle_irq as usize); + } + Ok(mem::size_of::()) + } + _ => Err(Error::new(EBADF)), + } } - fn close(&self, _file: usize) -> Result { - Ok(0) + fn kfstat(&self, id: usize, buf: UserSliceWo) -> Result<()> { + let handles_guard = HANDLES.read(); + let handle = handles_guard.get(&id).ok_or(Error::new(EBADF))?; + + buf.copy_exactly(&match *handle { + Handle::Irq { + irq: handle_irq, .. + } => Stat { + st_mode: MODE_CHR | 0o600, + st_size: mem::size_of::() as u64, + st_blocks: 1, + st_blksize: mem::size_of::() as u32, + st_ino: handle_irq.into(), + st_nlink: 1, + ..Default::default() + }, + Handle::Bsp => Stat { + st_mode: MODE_CHR | 0o400, + st_size: mem::size_of::() as u64, + st_blocks: 1, + st_blksize: mem::size_of::() as u32, + st_ino: INO_BSP, + st_nlink: 1, + ..Default::default() + }, + Handle::Avail(cpu_id) => Stat { + st_mode: MODE_DIR | 0o700, + st_size: 0, + st_ino: INO_AVAIL | u64::from(cpu_id.get()) << 32, + st_nlink: 2, + ..Default::default() + }, + Handle::Phandle(phandle, ref buf) => Stat { + st_mode: MODE_DIR | 0o700, + st_size: buf.len() as u64, + st_ino: INO_PHANDLE | u64::from(phandle) << 32, + st_nlink: 2, + ..Default::default() + }, + Handle::TopLevel => Stat { + st_mode: MODE_DIR | 0o500, + st_size: 0, + st_ino: INO_TOPLEVEL, + st_nlink: 1, + ..Default::default() + }, + })?; + + Ok(()) + } + fn kfpath(&self, id: usize, buf: UserSliceWo) -> Result { + let handles_guard = HANDLES.read(); + let handle = handles_guard.get(&id).ok_or(Error::new(EBADF))?; + + let scheme_path = match handle { + Handle::Irq { irq, .. } => format!("irq:{}", irq), + Handle::Bsp => format!("irq:bsp"), + Handle::Avail(cpu_id) => format!("irq:cpu-{:2x}", cpu_id.get()), + Handle::Phandle(phandle, _) => format!("irq:phandle-{}", phandle), + Handle::TopLevel => format!("irq:"), + } + .into_bytes(); + + buf.copy_common_bytes_from_slice(&scheme_path) + } + fn kreadoff( + &self, + file: usize, + buffer: UserSliceWo, + _offset: u64, + _flags: u32, + _stored_flags: u32, + ) -> Result { + let handles_guard = HANDLES.read(); + let handle = handles_guard.get(&file).ok_or(Error::new(EBADF))?; + + match *handle { + // Ensures that the length of the buffer is larger than the size of a usize + Handle::Irq { + irq: handle_irq, + ack: ref handle_ack, + } => { + if buffer.len() < mem::size_of::() { + return Err(Error::new(EINVAL)); + } + let current = COUNTS.lock()[handle_irq as usize]; + if handle_ack.load(Ordering::SeqCst) != current { + buffer.write_usize(current)?; + Ok(mem::size_of::()) + } else { + Ok(0) + } + } + Handle::Bsp => { + if buffer.len() < mem::size_of::() { + return Err(Error::new(EINVAL)); + } + buffer.write_u32(LogicalCpuId::BSP.get())?; + Ok(mem::size_of::()) + } + Handle::Avail(_) | Handle::TopLevel | Handle::Phandle(_, _) => Err(Error::new(EISDIR)), + } } } diff --git a/src/scheme/itimer.rs b/src/scheme/itimer.rs deleted file mode 100644 index 3a91c3a2..00000000 --- a/src/scheme/itimer.rs +++ /dev/null @@ -1,110 +0,0 @@ -use alloc::collections::BTreeMap; -use core::{mem, slice, str}; -use core::sync::atomic::{AtomicUsize, Ordering}; -use spin::RwLock; - -use crate::syscall::data::ITimerSpec; -use crate::syscall::error::*; -use crate::syscall::flag::{CLOCK_REALTIME, CLOCK_MONOTONIC}; -use crate::syscall::scheme::Scheme; - -pub struct ITimerScheme { - next_id: AtomicUsize, - handles: RwLock> -} - -impl ITimerScheme { - pub fn new() -> ITimerScheme { - ITimerScheme { - next_id: AtomicUsize::new(0), - handles: RwLock::new(BTreeMap::new()) - } - } -} - -impl Scheme for ITimerScheme { - fn open(&self, path: &[u8], _flags: usize, _uid: u32, _gid: u32) -> Result { - let path_str = str::from_utf8(path).or(Err(Error::new(ENOENT)))?; - - let clock = path_str.parse::().or(Err(Error::new(ENOENT)))?; - - match clock { - CLOCK_REALTIME => (), - CLOCK_MONOTONIC => (), - _ => return Err(Error::new(ENOENT)) - } - - let id = self.next_id.fetch_add(1, Ordering::SeqCst); - self.handles.write().insert(id, clock); - - Ok(id) - } - - fn read(&self, id: usize, buf: &mut [u8]) -> Result { - let _clock = { - let handles = self.handles.read(); - *handles.get(&id).ok_or(Error::new(EBADF))? - }; - - let time_buf = unsafe { slice::from_raw_parts_mut(buf.as_mut_ptr() as *mut ITimerSpec, buf.len()/mem::size_of::()) }; - - let mut i = 0; - while i < time_buf.len() { - time_buf[i] = ITimerSpec::default(); - i += 1; - } - - Ok(i * mem::size_of::()) - } - - fn write(&self, id: usize, buf: &[u8]) -> Result { - let _clock = { - let handles = self.handles.read(); - *handles.get(&id).ok_or(Error::new(EBADF))? - }; - - let time_buf = unsafe { slice::from_raw_parts(buf.as_ptr() as *const ITimerSpec, buf.len()/mem::size_of::()) }; - - let mut i = 0; - while i < time_buf.len() { - let time = time_buf[i]; - println!("{}: {:?}", i, time); - i += 1; - } - - Ok(i * mem::size_of::()) - } - - fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { - Ok(0) - } - - fn fevent(&self, id: usize, _flags: usize) -> Result { - let handles = self.handles.read(); - handles.get(&id).ok_or(Error::new(EBADF)).and(Ok(0)) - } - - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let clock = { - let handles = self.handles.read(); - *handles.get(&id).ok_or(Error::new(EBADF))? - }; - - let mut i = 0; - let scheme_path = format!("time:{}", clock).into_bytes(); - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; - } - Ok(i) - } - - fn fsync(&self, id: usize) -> Result { - let handles = self.handles.read(); - handles.get(&id).ok_or(Error::new(EBADF)).and(Ok(0)) - } - - fn close(&self, id: usize) -> Result { - self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) - } -} diff --git a/src/scheme/live.rs b/src/scheme/live.rs deleted file mode 100644 index fec8371c..00000000 --- a/src/scheme/live.rs +++ /dev/null @@ -1,164 +0,0 @@ -/// Disk scheme replacement when making live disk - -use alloc::sync::Arc; -use alloc::collections::BTreeMap; -use core::{cmp, slice}; -use core::sync::atomic::{AtomicUsize, Ordering}; -use spin::RwLock; - -use syscall::data::Stat; -use syscall::error::*; -use syscall::flag::{MODE_FILE, SEEK_SET, SEEK_CUR, SEEK_END}; -use syscall::scheme::Scheme; - -struct Handle { - path: &'static [u8], - data: Arc>, - mode: u16, - seek: usize -} - -pub struct DiskScheme { - next_id: AtomicUsize, - data: Arc>, - handles: RwLock> -} - -impl DiskScheme { - pub fn new() -> DiskScheme { - let data; - unsafe { - extern { - static mut __live_start: u8; - static mut __live_end: u8; - } - - let start = &mut __live_start as *mut u8; - let end = &mut __live_end as *mut u8; - - if end as usize >= start as usize { - data = slice::from_raw_parts_mut(start, end as usize - start as usize); - } else { - data = &mut []; - }; - } - - DiskScheme { - next_id: AtomicUsize::new(0), - data: Arc::new(RwLock::new(data)), - handles: RwLock::new(BTreeMap::new()) - } - } -} - -impl Scheme for DiskScheme { - fn open(&self, _path: &[u8], _flags: usize, _uid: u32, _gid: u32) -> Result { - let id = self.next_id.fetch_add(1, Ordering::SeqCst); - self.handles.write().insert(id, Handle { - path: b"0", - data: self.data.clone(), - mode: MODE_FILE | 0o744, - seek: 0 - }); - - Ok(id) - } - - fn read(&self, id: usize, buffer: &mut [u8]) -> Result { - let mut handles = self.handles.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - let data = handle.data.read(); - - let mut i = 0; - while i < buffer.len() && handle.seek < data.len() { - buffer[i] = data[handle.seek]; - i += 1; - handle.seek += 1; - } - - Ok(i) - } - - fn write(&self, id: usize, buffer: &[u8]) -> Result { - let mut handles = self.handles.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - let mut data = handle.data.write(); - - let mut i = 0; - while i < buffer.len() && handle.seek < data.len() { - data[handle.seek] = buffer[i]; - i += 1; - handle.seek += 1; - } - - Ok(i) - } - - fn seek(&self, id: usize, pos: usize, whence: usize) -> Result { - let mut handles = self.handles.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - let data = handle.data.read(); - - handle.seek = match whence { - SEEK_SET => cmp::min(data.len(), pos), - SEEK_CUR => cmp::max(0, cmp::min(data.len() as isize, handle.seek as isize + pos as isize)) as usize, - SEEK_END => cmp::max(0, cmp::min(data.len() as isize, data.len() as isize + pos as isize)) as usize, - _ => return Err(Error::new(EINVAL)) - }; - - Ok(handle.seek) - } - - fn fcntl(&self, id: usize, _cmd: usize, _arg: usize) -> Result { - let handles = self.handles.read(); - let _handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - Ok(0) - } - - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - //TODO: Copy scheme part in kernel - let mut i = 0; - let scheme_path = b"disk:"; - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; - } - - let mut j = 0; - while i < buf.len() && j < handle.path.len() { - buf[i] = handle.path[j]; - i += 1; - j += 1; - } - - Ok(i) - } - - fn fstat(&self, id: usize, stat: &mut Stat) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - let data = handle.data.read(); - - stat.st_mode = handle.mode; - stat.st_uid = 0; - stat.st_gid = 0; - stat.st_size = data.len() as u64; - - Ok(0) - } - - fn fsync(&self, id: usize) -> Result { - let handles = self.handles.read(); - let _handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - Ok(0) - } - - fn close(&self, id: usize) -> Result { - self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) - } -} diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index fd14ab31..f2374884 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -1,100 +1,302 @@ -use crate::context; -use crate::context::memory::Grant; -use crate::memory::{free_frames, used_frames}; -use crate::paging::VirtualAddress; +use core::num::NonZeroUsize; + +use alloc::{sync::Arc, vec::Vec}; +use rmm::PhysicalAddress; + +use crate::{ + context::{ + file::InternalFlags, + memory::{handle_notify_files, AddrSpace, AddrSpaceWrapper, Grant, PageSpan}, + }, + memory::{free_frames, used_frames, Frame, PAGE_SIZE}, + paging::VirtualAddress, + syscall::usercopy::UserSliceRw, +}; + use crate::paging::entry::EntryFlags; -use crate::syscall::data::{Map, StatVfs}; -use crate::syscall::error::*; -use crate::syscall::flag::{PROT_EXEC, PROT_READ, PROT_WRITE}; -use crate::syscall::scheme::Scheme; + +use crate::syscall::{ + data::{Map, StatVfs}, + error::*, + flag::MapFlags, + usercopy::UserSliceWo, +}; + +use super::{CallerCtx, KernelScheme, OpenResult}; pub struct MemoryScheme; -impl MemoryScheme { - pub fn new() -> Self { - MemoryScheme - } +// TODO: Use crate that autogenerates conversion functions. +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq)] +enum HandleTy { + Allocated = 0, + PhysBorrow = 1, + Translation = 2, +} +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum MemoryType { + Writeback = 0, + Uncacheable = 1, + WriteCombining = 2, + DeviceMemory = 3, } -impl Scheme for MemoryScheme { - fn open(&self, _path: &[u8], _flags: usize, _uid: u32, _gid: u32) -> Result { - Ok(0) + +bitflags! { + struct HandleFlags: u16 { + // TODO: below 32 bits? + const PHYS_CONTIGUOUS = 1; } +} - fn fstatvfs(&self, _file: usize, stat: &mut StatVfs) -> Result { - let used = used_frames() as u64; - let free = free_frames() as u64; +fn from_raw(raw: u32) -> Option<(HandleTy, MemoryType, HandleFlags)> { + Some(( + match raw & 0xFF { + 0 => HandleTy::Allocated, + 1 => HandleTy::PhysBorrow, + 2 => HandleTy::Translation, - stat.f_bsize = 4096; - stat.f_blocks = used + free; - stat.f_bfree = free; - stat.f_bavail = stat.f_bfree; + _ => return None, + }, + match (raw >> 8) & 0xFF { + 0 => MemoryType::Writeback, + 1 => MemoryType::Uncacheable, + 2 => MemoryType::WriteCombining, + 3 => MemoryType::DeviceMemory, - Ok(0) + _ => return None, + }, + HandleFlags::from_bits_truncate((raw >> 16) as u16), + )) +} + +impl MemoryScheme { + pub fn fmap_anonymous( + addr_space: &Arc, + map: &Map, + is_phys_contiguous: bool, + ) -> Result { + let span = PageSpan::validate_nonempty(VirtualAddress::new(map.address), map.size) + .ok_or(Error::new(EINVAL))?; + let page_count = NonZeroUsize::new(span.count).ok_or(Error::new(EINVAL))?; + + let mut notify_files = Vec::new(); + + if is_phys_contiguous && map.flags.contains(MapFlags::MAP_SHARED) { + // TODO: Should this be supported? + return Err(Error::new(EOPNOTSUPP)); + } + + let page = addr_space.acquire_write().mmap( + &addr_space, + (map.address != 0).then_some(span.base), + page_count, + map.flags, + &mut notify_files, + |dst_page, flags, mapper, flusher| { + let span = PageSpan::new(dst_page, page_count.get()); + if is_phys_contiguous { + Ok(Grant::zeroed_phys_contiguous(span, flags, mapper, flusher)?) + } else { + Ok(Grant::zeroed( + span, + flags, + mapper, + flusher, + map.flags.contains(MapFlags::MAP_SHARED), + )?) + } + }, + )?; + + handle_notify_files(notify_files); + + Ok(page.start_address().data()) } + pub fn physmap( + physical_address: usize, + size: usize, + flags: MapFlags, + memory_type: MemoryType, + ) -> Result { + // TODO: Check physical_address against the real MAXPHYADDR. + let end = 1 << 52; + if (physical_address.saturating_add(size) as u64) > end || physical_address % PAGE_SIZE != 0 + { + return Err(Error::new(EINVAL)); + } - fn fmap(&self, _id: usize, map: &Map) -> Result { - //TODO: Abstract with other grant creation - if map.size == 0 { - Ok(0) - } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + if size % PAGE_SIZE != 0 { + log::warn!( + "physmap size {} is not multiple of PAGE_SIZE {}", + size, + PAGE_SIZE + ); + return Err(Error::new(EINVAL)); + } + let page_count = NonZeroUsize::new(size.div_ceil(PAGE_SIZE)).ok_or(Error::new(EINVAL))?; - let mut grants = context.grants.lock(); + let current_addrsp = AddrSpace::current()?; - let full_size = ((map.size + 4095)/4096) * 4096; - let mut to_address = crate::USER_GRANT_OFFSET; + let base_page = current_addrsp.acquire_write().mmap_anywhere( + ¤t_addrsp, + page_count, + flags, + |dst_page, mut page_flags, dst_mapper, dst_flusher| { + match memory_type { + // Default + MemoryType::Writeback => (), - let mut entry_flags = EntryFlags::PRESENT | EntryFlags::USER_ACCESSIBLE; - if map.flags & PROT_EXEC == 0 { - entry_flags |= EntryFlags::NO_EXECUTE; - } - if map.flags & PROT_READ > 0 { - //TODO: PROT_READ - } - if map.flags & PROT_WRITE > 0 { - entry_flags |= EntryFlags::WRITABLE; - } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] // TODO: AARCH64 + MemoryType::WriteCombining => { + page_flags = page_flags.custom_flag(EntryFlags::HUGE_PAGE.bits(), true) + } + + MemoryType::Uncacheable => { + page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true) + } + + // MemoryType::DeviceMemory doesn't exist on x86 && x86_64, which instead support + // uncacheable, write-combining, write-through, write-protect, and write-back. + #[cfg(target_arch = "aarch64")] + MemoryType::DeviceMemory => { + page_flags = page_flags.custom_flag(EntryFlags::DEV_MEM.bits(), true) + } - let mut i = 0; - while i < grants.len() { - let start = grants[i].start_address().get(); - if to_address + full_size < start { - break; + _ => (), } - let pages = (grants[i].size() + 4095) / 4096; - let end = start + pages * 4096; - to_address = end; - i += 1; - } + Grant::physmap( + Frame::containing(PhysicalAddress::new(physical_address)), + PageSpan::new(dst_page, page_count.get()), + page_flags, + dst_mapper, + dst_flusher, + ) + }, + )?; + Ok(base_page.start_address().data()) + } +} +impl KernelScheme for MemoryScheme { + fn kopen(&self, path: &str, _flags: usize, ctx: CallerCtx) -> Result { + if path.len() > 64 { + return Err(Error::new(ENOENT)); + } + let path = path.trim_start_matches('/'); + + let (before_memty, memty_str) = path.split_once('@').unwrap_or((path, "")); + let (before_ty, type_str) = memty_str.split_once('?').unwrap_or((memty_str, "")); - grants.insert(i, Grant::map( - VirtualAddress::new(to_address), - full_size, - entry_flags - )); + let handle_ty = match before_memty { + "" | "zeroed" => HandleTy::Allocated, + "physical" => HandleTy::PhysBorrow, + "translation" => HandleTy::Translation, - Ok(to_address) + _ => return Err(Error::new(ENOENT)), + }; + let mem_ty = match before_ty { + "" | "wb" => MemoryType::Writeback, + "wc" => MemoryType::WriteCombining, + "uc" => MemoryType::Uncacheable, + "dev" => MemoryType::DeviceMemory, + + _ => return Err(Error::new(ENOENT)), + }; + + let flags = type_str + .split(',') + .filter_map(|ty_str| match ty_str { + //"32" => HandleFlags::BELOW_4G, + "phys_contiguous" => Some(Some(HandleFlags::PHYS_CONTIGUOUS)), + "" => None, + _ => Some(None), + }) + .collect::>() + .ok_or(Error::new(ENOENT))?; + + // TODO: Support arches with other default memory types? + if ctx.uid != 0 + && (!flags.is_empty() + || !matches!( + (handle_ty, mem_ty), + (HandleTy::Allocated, MemoryType::Writeback) + )) + { + return Err(Error::new(EACCES)); } + + Ok(OpenResult::SchemeLocal( + (handle_ty as usize) | ((mem_ty as usize) << 8) | (usize::from(flags.bits()) << 16), + InternalFlags::empty(), + )) } + fn kcall( + &self, + id: usize, + payload: UserSliceRw, + _flags: syscall::CallFlags, + _metadata: &[u64], + ) -> Result { + let (handle_ty, _, _) = u32::try_from(id) + .ok() + .and_then(from_raw) + .ok_or(Error::new(EBADF))?; + + match handle_ty { + HandleTy::Translation => { + let virt = VirtualAddress::new(payload.read_usize()?); + let (phys, _) = AddrSpace::current()? + .acquire_read() + .table + .utable + .translate(virt) + .ok_or(Error::new(ENOENT))?; + payload.write_usize(phys.data())?; - fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { - Ok(0) + // could just return address directly, but physaddrs might conflict with the bit + // patterns reserved for error codes + Ok(0) + } + HandleTy::Allocated | HandleTy::PhysBorrow => Err(Error::new(EOPNOTSUPP)), + } } - fn fpath(&self, _id: usize, buf: &mut [u8]) -> Result { - let mut i = 0; - let scheme_path = b"memory:"; - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; + fn kfmap( + &self, + id: usize, + addr_space: &Arc, + map: &Map, + _consume: bool, + ) -> Result { + let (handle_ty, mem_ty, flags) = u32::try_from(id) + .ok() + .and_then(from_raw) + .ok_or(Error::new(EBADF))?; + + match handle_ty { + HandleTy::Allocated => Self::fmap_anonymous( + addr_space, + map, + flags.contains(HandleFlags::PHYS_CONTIGUOUS), + ), + HandleTy::PhysBorrow => Self::physmap(map.offset, map.size, map.flags, mem_ty), + HandleTy::Translation => Err(Error::new(EOPNOTSUPP)), } - Ok(i) } + fn kfstatvfs(&self, _file: usize, dst: UserSliceWo) -> Result<()> { + let used = used_frames() as u64; + let free = free_frames() as u64; + + let stat = StatVfs { + f_bsize: PAGE_SIZE.try_into().map_err(|_| Error::new(EOVERFLOW))?, + f_blocks: used + free, + f_bfree: free, + f_bavail: free, + }; + dst.copy_exactly(&stat)?; - fn close(&self, _id: usize) -> Result { - Ok(0) + Ok(()) } } diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index 8d2e345e..2fcc1da0 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -6,27 +6,42 @@ //! The kernel validates paths and file descriptors before they are passed to schemes, //! also stripping the scheme identifier of paths if necessary. -use alloc::sync::Arc; -use alloc::boxed::Box; -use alloc::collections::BTreeMap; -use alloc::vec::Vec; -use core::sync::atomic::AtomicUsize; -use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +// TODO: Move handling of the global namespace to userspace. -use crate::syscall::error::*; -use crate::syscall::scheme::Scheme; - -use self::debug::DebugScheme; -use self::event::EventScheme; -use self::initfs::InitFsScheme; -use self::irq::IrqScheme; -use self::itimer::ITimerScheme; -use self::memory::MemoryScheme; -use self::pipe::PipeScheme; -use self::proc::ProcScheme; -use self::root::RootScheme; -use self::sys::SysScheme; -use self::time::TimeScheme; +use alloc::{boxed::Box, collections::BTreeMap, string::ToString, sync::Arc, vec::Vec}; +use core::{hash::BuildHasherDefault, sync::atomic::AtomicUsize}; +use hashbrown::{hash_map::DefaultHashBuilder, HashMap}; +use indexmap::IndexMap; +use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use syscall::{CallFlags, EventFlags, MunmapFlags, SendFdFlags}; + +use crate::{ + context::{ + file::{FileDescription, InternalFlags}, + memory::AddrSpaceWrapper, + }, + syscall::{ + error::*, + usercopy::{UserSliceRo, UserSliceRw, UserSliceWo}, + }, +}; + +#[cfg(feature = "acpi")] +use self::acpi::AcpiScheme; +#[cfg(dtb)] +use self::dtb::DtbScheme; + +use self::{ + debug::DebugScheme, event::EventScheme, irq::IrqScheme, memory::MemoryScheme, pipe::PipeScheme, + proc::ProcScheme, root::RootScheme, serio::SerioScheme, sys::SysScheme, time::TimeScheme, + user::UserScheme, +}; + +/// When compiled with the "acpi" feature - `acpi:` - allows drivers to read a limited set of ACPI tables. +#[cfg(feature = "acpi")] +pub mod acpi; +#[cfg(dtb)] +pub mod dtb; /// `debug:` - provides access to serial console pub mod debug; @@ -34,19 +49,9 @@ pub mod debug; /// `event:` - allows reading of `Event`s which are registered using `fevent` pub mod event; -/// `initfs:` - a readonly filesystem used for initializing the system -pub mod initfs; - /// `irq:` - allows userspace handling of IRQs pub mod irq; -/// `itimer:` - support for getitimer and setitimer -pub mod itimer; - -/// When compiled with "live" feature - `disk:` - embedded filesystem for live disk -#[cfg(feature="live")] -pub mod live; - /// `memory:` - a scheme for accessing physical memory pub mod memory; @@ -59,6 +64,9 @@ pub mod proc; /// `:` - allows the creation of userspace schemes, tightly dependent on `user` pub mod root; +/// `serio:` - provides access to ps/2 devices +pub mod serio; + /// `sys:` - system information, such as the context list and scheme list pub mod sys; @@ -75,19 +83,17 @@ pub const SCHEME_MAX_SCHEMES: usize = 65_536; int_like!(SchemeNamespace, AtomicSchemeNamespace, usize, AtomicUsize); // Unique identifier for a scheme. -int_like!(SchemeId, AtomicSchemeId, usize, AtomicUsize); - -pub const ATOMIC_SCHEMEID_INIT: AtomicSchemeId = AtomicSchemeId::default(); +int_like!(SchemeId, usize); // Unique identifier for a file descriptor. int_like!(FileHandle, AtomicFileHandle, usize, AtomicUsize); pub struct SchemeIter<'a> { - inner: Option<::alloc::collections::btree_map::Iter<'a, Box<[u8]>, SchemeId>> + inner: Option, SchemeId>>, } impl<'a> Iterator for SchemeIter<'a> { - type Item = (&'a Box<[u8]>, &'a SchemeId); + type Item = (&'a Box, &'a SchemeId); fn next(&mut self) -> Option { self.inner.as_mut().and_then(|iter| iter.next()) @@ -96,38 +102,77 @@ impl<'a> Iterator for SchemeIter<'a> { /// Scheme list type pub struct SchemeList { - map: BTreeMap>>, - names: BTreeMap, SchemeId>>, + map: HashMap, + pub(crate) names: HashMap, SchemeId, DefaultHashBuilder>>, next_ns: usize, - next_id: usize + next_id: usize, } - impl SchemeList { /// Create a new scheme list. pub fn new() -> Self { let mut list = SchemeList { - map: BTreeMap::new(), - names: BTreeMap::new(), + map: HashMap::new(), + names: HashMap::new(), // Scheme namespaces always start at 1. 0 is a reserved namespace, the null namespace next_ns: 1, - next_id: 1 + next_id: MAX_GLOBAL_SCHEMES, + }; + + let mut insert_globals = |globals: &[GlobalSchemes]| { + for &g in globals { + list.map + .insert(SchemeId::from(g as usize), KernelSchemes::Global(g)); + } }; + + // TODO: impl TryFrom and bypass map for global schemes? + { + use GlobalSchemes::*; + insert_globals(&[Debug, Event, Memory, Pipe, Serio, Irq, Time, Sys, Proc]); + + #[cfg(feature = "acpi")] + insert_globals(&[Acpi]); + + #[cfg(dtb)] + insert_globals(&[Dtb]); + } + + list.new_null(); list.new_root(); list } + /// Initialize the null namespace + fn new_null(&mut self) { + let ns = SchemeNamespace(0); + self.names + .insert(ns, IndexMap::with_hasher(BuildHasherDefault::default())); + + //TODO: Only memory: is in the null namespace right now. It should be removed when + //anonymous mmap's are implemented + self.insert_global(ns, "memory", GlobalSchemes::Memory) + .unwrap(); + self.insert_global(ns, "pipe", GlobalSchemes::Pipe).unwrap(); + } + /// Initialize a new namespace fn new_ns(&mut self) -> SchemeNamespace { let ns = SchemeNamespace(self.next_ns); self.next_ns += 1; - self.names.insert(ns, BTreeMap::new()); - - self.insert(ns, Box::new(*b""), |scheme_id| Arc::new(Box::new(RootScheme::new(ns, scheme_id)))).unwrap(); - self.insert(ns, Box::new(*b"event"), |_| Arc::new(Box::new(EventScheme))).unwrap(); - self.insert(ns, Box::new(*b"itimer"), |_| Arc::new(Box::new(ITimerScheme::new()))).unwrap(); - self.insert(ns, Box::new(*b"memory"), |_| Arc::new(Box::new(MemoryScheme::new()))).unwrap(); - self.insert(ns, Box::new(*b"sys"), |_| Arc::new(Box::new(SysScheme::new()))).unwrap(); - self.insert(ns, Box::new(*b"time"), |scheme_id| Arc::new(Box::new(TimeScheme::new(scheme_id)))).unwrap(); + self.names + .insert(ns, IndexMap::with_hasher(BuildHasherDefault::default())); + + self.insert(ns, "", |scheme_id| { + KernelSchemes::Root(Arc::new(RootScheme::new(ns, scheme_id))) + }) + .unwrap(); + self.insert_global(ns, "event", GlobalSchemes::Event) + .unwrap(); + self.insert_global(ns, "memory", GlobalSchemes::Memory) + .unwrap(); + self.insert_global(ns, "pipe", GlobalSchemes::Pipe).unwrap(); + self.insert_global(ns, "sys", GlobalSchemes::Sys).unwrap(); + self.insert_global(ns, "time", GlobalSchemes::Time).unwrap(); ns } @@ -138,33 +183,46 @@ impl SchemeList { let ns = self.new_ns(); // These schemes should only be available on the root - self.insert(ns, Box::new(*b"debug"), |scheme_id| Arc::new(Box::new(DebugScheme::new(scheme_id)))).unwrap(); - self.insert(ns, Box::new(*b"initfs"), |_| Arc::new(Box::new(InitFsScheme::new()))).unwrap(); - self.insert(ns, Box::new(*b"irq"), |scheme_id| Arc::new(Box::new(IrqScheme::new(scheme_id)))).unwrap(); - self.insert(ns, Box::new(*b"proc"), |scheme_id| Arc::new(Box::new(ProcScheme::new(scheme_id)))).unwrap(); - - #[cfg(feature = "live")] { - self.insert(ns, Box::new(*b"disk/live"), |_| Arc::new(Box::new(self::live::DiskScheme::new()))).unwrap(); + #[cfg(dtb)] + { + self.insert_global(ns, "kernel.dtb", GlobalSchemes::Dtb) + .unwrap(); } - - // Pipe is special and needs to be in the root namespace - self.insert(ns, Box::new(*b"pipe"), |scheme_id| Arc::new(Box::new(PipeScheme::new(scheme_id)))).unwrap(); + #[cfg(feature = "acpi")] + { + self.insert_global(ns, "kernel.acpi", GlobalSchemes::Acpi) + .unwrap(); + } + self.insert_global(ns, "debug", GlobalSchemes::Debug) + .unwrap(); + self.insert_global(ns, "irq", GlobalSchemes::Irq).unwrap(); + self.insert_global(ns, "kernel.proc", GlobalSchemes::Proc) + .unwrap(); + self.insert_global(ns, "serio", GlobalSchemes::Serio) + .unwrap(); } - pub fn make_ns(&mut self, from: SchemeNamespace, names: &[&[u8]]) -> Result { + pub fn make_ns( + &mut self, + from: SchemeNamespace, + names: impl IntoIterator>, + ) -> Result { // Create an empty namespace let to = self.new_ns(); // Copy requested scheme IDs - for name in names.iter() { - let id = if let Some((id, _scheme)) = self.get_name(from, name) { - id - } else { + for name in names { + let Some((id, _scheme)) = self.get_name(from, &name) else { return Err(Error::new(ENODEV)); }; if let Some(ref mut names) = self.names.get_mut(&to) { - assert!(names.insert(name.to_vec().into_boxed_slice(), id).is_none()); + if names + .insert(name.to_string().into_boxed_str(), id) + .is_some() + { + return Err(Error::new(EEXIST)); + } } else { panic!("scheme namespace not found"); } @@ -173,22 +231,18 @@ impl SchemeList { Ok(to) } - pub fn iter(&self) -> ::alloc::collections::btree_map::Iter>> { - self.map.iter() - } - pub fn iter_name(&self, ns: SchemeNamespace) -> SchemeIter { SchemeIter { - inner: self.names.get(&ns).map(|names| names.iter()) + inner: self.names.get(&ns).map(|names| names.iter()), } } /// Get the nth scheme. - pub fn get(&self, id: SchemeId) -> Option<&Arc>> { + pub fn get(&self, id: SchemeId) -> Option<&KernelSchemes> { self.map.get(&id) } - pub fn get_name(&self, ns: SchemeNamespace, name: &[u8]) -> Option<(SchemeId, &Arc>)> { + pub fn get_name(&self, ns: SchemeNamespace, name: &str) -> Option<(SchemeId, &KernelSchemes)> { if let Some(names) = self.names.get(&ns) { if let Some(&id) = names.get(name) { return self.get(id).map(|scheme| (id, scheme)); @@ -197,12 +251,44 @@ impl SchemeList { None } + pub fn insert_global( + &mut self, + ns: SchemeNamespace, + name: &str, + global: GlobalSchemes, + ) -> Result<()> { + let prev = self + .names + .get_mut(&ns) + .ok_or(Error::new(ENODEV))? + .insert(name.into(), global.scheme_id()); + + if prev.is_some() { + return Err(Error::new(EEXIST)); + } + + Ok(()) + } + /// Create a new scheme. - pub fn insert(&mut self, ns: SchemeNamespace, name: Box<[u8]>, scheme_fn: F) -> Result - where F: Fn(SchemeId) -> Arc> - { + pub fn insert( + &mut self, + ns: SchemeNamespace, + name: &str, + scheme_fn: impl FnOnce(SchemeId) -> KernelSchemes, + ) -> Result { + self.insert_and_pass(ns, name, |id| (scheme_fn(id), ())) + .map(|(id, ())| id) + } + + pub fn insert_and_pass( + &mut self, + ns: SchemeNamespace, + name: &str, + scheme_fn: impl FnOnce(SchemeId) -> (KernelSchemes, T), + ) -> Result<(SchemeId, T)> { if let Some(names) = self.names.get(&ns) { - if names.contains_key(&name) { + if names.contains_key(name) { return Err(Error::new(EEXIST)); } } @@ -224,16 +310,18 @@ impl SchemeList { let id = SchemeId(self.next_id); self.next_id += 1; - let scheme = scheme_fn(id); + let (new_scheme, t) = scheme_fn(id); - assert!(self.map.insert(id, scheme).is_none()); + assert!(self.map.insert(id, new_scheme).is_none()); if let Some(ref mut names) = self.names.get_mut(&ns) { - assert!(names.insert(name, id).is_none()); + assert!(names + .insert(name.to_string().into_boxed_str(), id) + .is_none()); } else { // Nonexistent namespace, posssibly null namespace return Err(Error::new(ENODEV)); } - Ok(id) + Ok((id, t)) } /// Remove a scheme @@ -247,7 +335,7 @@ impl SchemeList { } } for name in remove { - assert!(names.remove(&name).is_some()); + assert!(names.swap_remove(&name).is_some()); } } } @@ -270,3 +358,236 @@ pub fn schemes() -> RwLockReadGuard<'static, SchemeList> { pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { SCHEMES.call_once(init_schemes).write() } + +#[allow(unused_variables)] +pub trait KernelScheme: Send + Sync + 'static { + fn kopen(&self, path: &str, flags: usize, _ctx: CallerCtx) -> Result { + Err(Error::new(ENOENT)) + } + + fn kfmap( + &self, + number: usize, + addr_space: &Arc, + map: &crate::syscall::data::Map, + consume: bool, + ) -> Result { + Err(Error::new(EOPNOTSUPP)) + } + fn kfunmap(&self, number: usize, offset: usize, size: usize, flags: MunmapFlags) -> Result<()> { + Err(Error::new(EOPNOTSUPP)) + } + + fn kdup(&self, old_id: usize, buf: UserSliceRo, _caller: CallerCtx) -> Result { + Err(Error::new(EOPNOTSUPP)) + } + fn kwriteoff( + &self, + id: usize, + buf: UserSliceRo, + offset: u64, + flags: u32, + stored_flags: u32, + ) -> Result { + if offset != u64::MAX { + return Err(Error::new(ESPIPE)); + } + self.kwrite(id, buf, flags, stored_flags) + } + fn kreadoff( + &self, + id: usize, + buf: UserSliceWo, + offset: u64, + flags: u32, + stored_flags: u32, + ) -> Result { + if offset != u64::MAX { + return Err(Error::new(ESPIPE)); + } + self.kread(id, buf, flags, stored_flags) + } + fn kwrite(&self, id: usize, buf: UserSliceRo, flags: u32, stored_flags: u32) -> Result { + Err(Error::new(EBADF)) + } + fn kread(&self, id: usize, buf: UserSliceWo, flags: u32, stored_flags: u32) -> Result { + Err(Error::new(EBADF)) + } + fn kfpath(&self, id: usize, buf: UserSliceWo) -> Result { + Err(Error::new(EBADF)) + } + fn kfutimens(&self, id: usize, buf: UserSliceRo) -> Result { + Err(Error::new(EBADF)) + } + fn kfstat(&self, id: usize, buf: UserSliceWo) -> Result<()> { + Err(Error::new(EBADF)) + } + fn kfstatvfs(&self, id: usize, buf: UserSliceWo) -> Result<()> { + Err(Error::new(EBADF)) + } + + fn ksendfd( + &self, + id: usize, + desc: Arc>, + flags: SendFdFlags, + arg: u64, + ) -> Result { + Err(Error::new(EOPNOTSUPP)) + } + fn getdents( + &self, + id: usize, + buf: UserSliceWo, + header_size: u16, + opaque_id_first: u64, + ) -> Result { + Err(Error::new(EOPNOTSUPP)) + } + + fn fsync(&self, id: usize) -> Result<()> { + Ok(()) + } + fn ftruncate(&self, id: usize, len: usize) -> Result<()> { + Err(Error::new(EBADF)) + } + fn fsize(&self, id: usize) -> Result { + Err(Error::new(ESPIPE)) + } + fn legacy_seek(&self, id: usize, pos: isize, whence: usize) -> Option> { + None + } + fn fchmod(&self, id: usize, new_mode: u16) -> Result<()> { + Err(Error::new(EBADF)) + } + fn fchown(&self, id: usize, new_uid: u32, new_gid: u32) -> Result<()> { + Err(Error::new(EBADF)) + } + fn fevent(&self, id: usize, flags: EventFlags) -> Result { + Ok(EventFlags::empty()) + } + fn flink(&self, id: usize, new_path: &str, caller_ctx: CallerCtx) -> Result<()> { + Err(Error::new(EBADF)) + } + fn frename(&self, id: usize, new_path: &str, caller_ctx: CallerCtx) -> Result<()> { + Err(Error::new(EBADF)) + } + fn fcntl(&self, id: usize, cmd: usize, arg: usize) -> Result { + Ok(0) + } + fn rmdir(&self, path: &str, ctx: CallerCtx) -> Result<()> { + Err(Error::new(ENOENT)) + } + fn unlink(&self, path: &str, ctx: CallerCtx) -> Result<()> { + Err(Error::new(ENOENT)) + } + fn close(&self, id: usize) -> Result<()> { + Ok(()) + } + fn on_close(&self, id: usize) -> Result<()> { + self.close(id) + } + fn kcall( + &self, + id: usize, + payload: UserSliceRw, + flags: CallFlags, + metadata: &[u64], + ) -> Result { + Err(Error::new(EOPNOTSUPP)) + } +} + +#[derive(Debug)] +pub enum OpenResult { + SchemeLocal(usize, InternalFlags), + External(Arc>), +} +pub struct CallerCtx { + pub pid: usize, + pub uid: u32, + pub gid: u32, +} + +#[derive(Clone)] +pub enum KernelSchemes { + Root(Arc), + User(UserScheme), + Global(GlobalSchemes), +} +#[repr(u8)] +#[derive(Clone, Copy)] +pub enum GlobalSchemes { + Debug = 1, + Event, + Memory, + Pipe, + Serio, + Irq, + Time, + Sys, + Proc, + + #[cfg(feature = "acpi")] + Acpi, + + #[cfg(dtb)] + Dtb, +} +pub const MAX_GLOBAL_SCHEMES: usize = 16; + +const _: () = { + assert!(1 + core::mem::variant_count::() < MAX_GLOBAL_SCHEMES); +}; + +impl core::ops::Deref for KernelSchemes { + type Target = dyn KernelScheme; + + fn deref(&self) -> &Self::Target { + match self { + Self::Root(scheme) => &**scheme, + Self::User(scheme) => scheme, + + Self::Global(global) => &**global, + } + } +} +impl core::ops::Deref for GlobalSchemes { + type Target = dyn KernelScheme; + + fn deref(&self) -> &Self::Target { + match self { + Self::Debug => &DebugScheme, + Self::Event => &EventScheme, + Self::Memory => &MemoryScheme, + Self::Pipe => &PipeScheme, + Self::Serio => &SerioScheme, + Self::Irq => &IrqScheme, + Self::Time => &TimeScheme, + Self::Sys => &SysScheme, + Self::Proc => &ProcScheme, + #[cfg(feature = "acpi")] + Self::Acpi => &AcpiScheme, + #[cfg(dtb)] + Self::Dtb => &DtbScheme, + } + } +} +impl GlobalSchemes { + pub fn scheme_id(self) -> SchemeId { + SchemeId::new(self as usize) + } +} + +#[cold] +pub fn init_globals() { + #[cfg(feature = "acpi")] + { + AcpiScheme::init(); + } + #[cfg(dtb)] + { + DtbScheme::init(); + } + IrqScheme::init(); +} diff --git a/src/scheme/pipe.rs b/src/scheme/pipe.rs index 7c02c077..278e004c 100644 --- a/src/scheme/pipe.rs +++ b/src/scheme/pipe.rs @@ -1,277 +1,281 @@ -use alloc::sync::{Arc, Weak}; -use alloc::collections::{BTreeMap, VecDeque}; -use core::sync::atomic::{AtomicUsize, Ordering}; -use spin::{Mutex, Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; - -use crate::event; -use crate::scheme::{AtomicSchemeId, ATOMIC_SCHEMEID_INIT, SchemeId}; -use crate::sync::WaitCondition; -use crate::syscall::error::{Error, Result, EAGAIN, EBADF, EINTR, EINVAL, EPIPE, ESPIPE}; -use crate::syscall::flag::{EVENT_READ, EVENT_WRITE, F_GETFL, F_SETFL, O_ACCMODE, O_NONBLOCK, MODE_FIFO}; -use crate::syscall::scheme::Scheme; -use crate::syscall::data::Stat; - -/// Pipes list -pub static PIPE_SCHEME_ID: AtomicSchemeId = ATOMIC_SCHEMEID_INIT; -static PIPE_NEXT_ID: AtomicUsize = AtomicUsize::new(0); -static PIPES: Once>, BTreeMap>)>> = Once::new(); - -/// Initialize pipes, called if needed -fn init_pipes() -> RwLock<(BTreeMap>, BTreeMap>)> { - RwLock::new((BTreeMap::new(), BTreeMap::new())) -} +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -/// Get the global pipes list, const -fn pipes() -> RwLockReadGuard<'static, (BTreeMap>, BTreeMap>)> { - PIPES.call_once(init_pipes).read() -} +use alloc::{ + collections::{BTreeMap, VecDeque}, + sync::Arc, +}; + +use spin::{Mutex, RwLock}; + +use crate::{ + context::file::InternalFlags, + event, + sync::WaitCondition, + syscall::{ + data::Stat, + error::{Error, Result, EAGAIN, EBADF, EINTR, EINVAL, ENOENT, EPIPE}, + flag::{EventFlags, EVENT_READ, EVENT_WRITE, MODE_FIFO, O_NONBLOCK}, + usercopy::{UserSliceRo, UserSliceWo}, + }, +}; + +use super::{CallerCtx, GlobalSchemes, KernelScheme, OpenResult}; + +// TODO: Preallocate a number of scheme IDs, since there can only be *one* root namespace, and +// therefore only *one* pipe scheme. +static PIPE_NEXT_ID: AtomicUsize = AtomicUsize::new(1); + +// TODO: SLOB? +// Using BTreeMap as hashbrown doesn't have a const constructor. +static PIPES: RwLock>> = RwLock::new(BTreeMap::new()); + +const MAX_QUEUE_SIZE: usize = 65536; -/// Get the global pipes list, mutable -fn pipes_mut() -> RwLockWriteGuard<'static, (BTreeMap>, BTreeMap>)> { - PIPES.call_once(init_pipes).write() +// In almost all places where Rust (and LLVM) uses pointers, they are limited to nonnegative isize, +// so this is fine. +const WRITE_NOT_READ_BIT: usize = 1 << (usize::BITS - 1); + +fn from_raw_id(id: usize) -> (bool, usize) { + (id & WRITE_NOT_READ_BIT != 0, id & !WRITE_NOT_READ_BIT) } -pub fn pipe(flags: usize) -> (usize, usize) { - let mut pipes = pipes_mut(); - let scheme_id = PIPE_SCHEME_ID.load(Ordering::SeqCst); - let read_id = PIPE_NEXT_ID.fetch_add(1, Ordering::SeqCst); - let write_id = PIPE_NEXT_ID.fetch_add(1, Ordering::SeqCst); - let read = PipeRead::new(scheme_id, write_id, flags); - let write = PipeWrite::new(&read, read_id, flags); - pipes.0.insert(read_id, Arc::new(read)); - pipes.1.insert(write_id, Arc::new(write)); - (read_id, write_id) +pub fn pipe() -> Result<(usize, usize)> { + let id = PIPE_NEXT_ID.fetch_add(1, Ordering::Relaxed); + + PIPES.write().insert( + id, + Arc::new(Pipe { + queue: Mutex::new(VecDeque::new()), + read_condition: WaitCondition::new(), + write_condition: WaitCondition::new(), + writer_is_alive: AtomicBool::new(true), + reader_is_alive: AtomicBool::new(true), + has_run_dup: AtomicBool::new(false), + }), + ); + + Ok((id, id | WRITE_NOT_READ_BIT)) } pub struct PipeScheme; -impl PipeScheme { - pub fn new(scheme_id: SchemeId) -> PipeScheme { - PIPE_SCHEME_ID.store(scheme_id, Ordering::SeqCst); - PipeScheme - } -} - -impl Scheme for PipeScheme { - fn read(&self, id: usize, buf: &mut [u8]) -> Result { - // Clone to prevent deadlocks - let pipe = { - let pipes = pipes(); - pipes.0.get(&id).map(|pipe| pipe.clone()).ok_or(Error::new(EBADF))? - }; +impl KernelScheme for PipeScheme { + fn fevent(&self, id: usize, flags: EventFlags) -> Result { + let (is_writer_not_reader, key) = from_raw_id(id); + let pipe = Arc::clone(PIPES.read().get(&key).ok_or(Error::new(EBADF))?); - pipe.read(buf) - } + let mut ready = EventFlags::empty(); - fn write(&self, id: usize, buf: &[u8]) -> Result { - // Clone to prevent deadlocks - let pipe = { - let pipes = pipes(); - pipes.1.get(&id).map(|pipe| pipe.clone()).ok_or(Error::new(EBADF))? - }; + if is_writer_not_reader + && flags.contains(EVENT_WRITE) + && (pipe.queue.lock().len() <= MAX_QUEUE_SIZE + || !pipe.reader_is_alive.load(Ordering::Acquire)) + { + ready |= EventFlags::EVENT_WRITE; + } + if !is_writer_not_reader + && flags.contains(EVENT_READ) + && (!pipe.queue.lock().is_empty() || !pipe.writer_is_alive.load(Ordering::Acquire)) + { + ready |= EventFlags::EVENT_READ; + } - pipe.write(buf) + Ok(ready) } - fn fcntl(&self, id: usize, cmd: usize, arg: usize) -> Result { - let pipes = pipes(); + fn close(&self, id: usize) -> Result<()> { + let (is_write_not_read, key) = from_raw_id(id); - if let Some(pipe) = pipes.0.get(&id) { - return pipe.fcntl(cmd, arg); - } + let pipe = Arc::clone(PIPES.read().get(&key).ok_or(Error::new(EBADF))?); + let scheme_id = GlobalSchemes::Pipe.scheme_id(); - if let Some(pipe) = pipes.1.get(&id) { - return pipe.fcntl(cmd, arg); - } + let can_remove = if is_write_not_read { + event::trigger(scheme_id, key, EVENT_READ); - Err(Error::new(EBADF)) - } + pipe.read_condition.notify(); + pipe.writer_is_alive.store(false, Ordering::SeqCst); - fn fevent(&self, id: usize, flags: usize) -> Result { - let pipes = pipes(); - - if let Some(pipe) = pipes.0.get(&id) { - if flags == EVENT_READ { - // TODO: Return correct flags - if pipe.vec.lock().is_empty() { - return Ok(0); - } else { - return Ok(EVENT_READ); - } - } - } + !pipe.reader_is_alive.load(Ordering::SeqCst) + } else { + event::trigger(scheme_id, key | WRITE_NOT_READ_BIT, EVENT_WRITE); - if let Some(_pipe) = pipes.1.get(&id) { - if flags == EVENT_WRITE { - return Ok(EVENT_WRITE); - } - } + pipe.write_condition.notify(); + pipe.reader_is_alive.store(false, Ordering::SeqCst); - Err(Error::new(EBADF)) - } + !pipe.writer_is_alive.load(Ordering::SeqCst) + }; - fn fpath(&self, _id: usize, buf: &mut [u8]) -> Result { - let mut i = 0; - let scheme_path = b"pipe:"; - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; + if can_remove { + let _ = PIPES.write().remove(&key); } - Ok(i) + + Ok(()) } - fn fstat(&self, _id: usize, stat: &mut Stat) -> Result { - *stat = Stat { - st_mode: MODE_FIFO | 0o666, - ..Default::default() - }; + fn kdup(&self, old_id: usize, user_buf: UserSliceRo, _ctx: CallerCtx) -> Result { + let (is_writer_not_reader, key) = from_raw_id(old_id); - Ok(0) - } + if is_writer_not_reader { + return Err(Error::new(EBADF)); + } - fn fsync(&self, _id: usize) -> Result { - Ok(0) - } + let mut buf = [0_u8; 5]; - fn close(&self, id: usize) -> Result { - let mut pipes = pipes_mut(); + if user_buf.copy_common_bytes_to_slice(&mut buf)? < 5 || buf != *b"write" { + return Err(Error::new(EINVAL)); + } - drop(pipes.0.remove(&id)); - drop(pipes.1.remove(&id)); + let pipe = Arc::clone(PIPES.read().get(&key).ok_or(Error::new(EBADF))?); - Ok(0) - } + if pipe.has_run_dup.swap(true, Ordering::SeqCst) { + return Err(Error::new(EBADF)); + } - fn seek(&self, _id: usize, _pos: usize, _whence: usize) -> Result { - Err(Error::new(ESPIPE)) + Ok(OpenResult::SchemeLocal( + key | WRITE_NOT_READ_BIT, + InternalFlags::empty(), + )) } -} + fn kopen(&self, path: &str, _flags: usize, _ctx: CallerCtx) -> Result { + if !path.trim_start_matches('/').is_empty() { + return Err(Error::new(ENOENT)); + } -/// Read side of a pipe -pub struct PipeRead { - scheme_id: SchemeId, - write_id: usize, - flags: AtomicUsize, - condition: Arc, - vec: Arc>> -} + let (read_id, _) = pipe()?; -impl PipeRead { - pub fn new(scheme_id: SchemeId, write_id: usize, flags: usize) -> Self { - PipeRead { - scheme_id, - write_id, - flags: AtomicUsize::new(flags), - condition: Arc::new(WaitCondition::new()), - vec: Arc::new(Mutex::new(VecDeque::new())), - } + Ok(OpenResult::SchemeLocal(read_id, InternalFlags::empty())) } - fn fcntl(&self, cmd: usize, arg: usize) -> Result { - match cmd { - F_GETFL => Ok(self.flags.load(Ordering::SeqCst)), - F_SETFL => { - self.flags.store(arg & ! O_ACCMODE, Ordering::SeqCst); - Ok(0) - }, - _ => Err(Error::new(EINVAL)) + fn kread( + &self, + id: usize, + user_buf: UserSliceWo, + fcntl_flags: u32, + _stored_flags: u32, + ) -> Result { + let (is_write_not_read, key) = from_raw_id(id); + + if is_write_not_read { + return Err(Error::new(EBADF)); } - } + let pipe = Arc::clone(PIPES.read().get(&key).ok_or(Error::new(EBADF))?); - fn read(&self, buf: &mut [u8]) -> Result { loop { - { - let mut vec = self.vec.lock(); - - let mut i = 0; - while i < buf.len() { - if let Some(b) = vec.pop_front() { - buf[i] = b; - i += 1; - } else { - break; - } - } - - if i > 0 { - event::trigger(self.scheme_id, self.write_id, EVENT_WRITE); - - return Ok(i); - } + let mut vec = pipe.queue.lock(); + + let (s1, s2) = vec.as_slices(); + let s1_count = core::cmp::min(user_buf.len(), s1.len()); + + let (s1_dst, s2_buf) = user_buf + .split_at(s1_count) + .expect("s1_count <= user_buf.len()"); + s1_dst.copy_from_slice(&s1[..s1_count])?; + + let s2_count = core::cmp::min(s2_buf.len(), s2.len()); + s2_buf + .limit(s2_count) + .expect("s2_count <= s2_buf.len()") + .copy_from_slice(&s2[..s2_count])?; + + let bytes_read = s1_count + s2_count; + let _ = vec.drain(..bytes_read); + + if bytes_read > 0 { + event::trigger( + GlobalSchemes::Pipe.scheme_id(), + key | WRITE_NOT_READ_BIT, + EVENT_WRITE, + ); + pipe.write_condition.notify(); + + return Ok(bytes_read); + } else if user_buf.is_empty() { + return Ok(0); } - if Arc::weak_count(&self.vec) == 0 { + if !pipe.writer_is_alive.load(Ordering::SeqCst) { return Ok(0); - } else if self.flags.load(Ordering::SeqCst) & O_NONBLOCK == O_NONBLOCK { + } else if fcntl_flags & O_NONBLOCK as u32 != 0 { return Err(Error::new(EAGAIN)); - } else { - if ! self.condition.wait() { - return Err(Error::new(EINTR)); - } + } else if !pipe.read_condition.wait(vec, "PipeRead::read") { + return Err(Error::new(EINTR)); } } } -} - -/// Read side of a pipe -pub struct PipeWrite { - scheme_id: SchemeId, - read_id: usize, - flags: AtomicUsize, - condition: Arc, - vec: Option>>> -} - -impl PipeWrite { - pub fn new(read: &PipeRead, read_id: usize, flags: usize) -> Self { - PipeWrite { - scheme_id: read.scheme_id, - read_id, - flags: AtomicUsize::new(flags), - condition: read.condition.clone(), - vec: Some(Arc::downgrade(&read.vec)), + fn kwrite( + &self, + id: usize, + user_buf: UserSliceRo, + fcntl_flags: u32, + _stored_flags: u32, + ) -> Result { + let (is_write_not_read, key) = from_raw_id(id); + + if !is_write_not_read { + return Err(Error::new(EBADF)); } - } + let pipe = Arc::clone(PIPES.read().get(&key).ok_or(Error::new(EBADF))?); - fn fcntl(&self, cmd: usize, arg: usize) -> Result { - match cmd { - F_GETFL => Ok(self.flags.load(Ordering::SeqCst)), - F_SETFL => { - self.flags.store(arg & ! O_ACCMODE, Ordering::SeqCst); - Ok(0) - }, - _ => Err(Error::new(EINVAL)) - } - } + loop { + let mut vec = pipe.queue.lock(); - fn write(&self, buf: &[u8]) -> Result { - if let Some(ref vec_weak) = self.vec { - if let Some(vec_lock) = vec_weak.upgrade() { - { - let mut vec = vec_lock.lock(); + if !pipe.reader_is_alive.load(Ordering::Relaxed) { + return Err(Error::new(EPIPE)); + } - for &b in buf.iter() { - vec.push_back(b); - } - } + let bytes_left = MAX_QUEUE_SIZE.saturating_sub(vec.len()); + let bytes_to_write = core::cmp::min(bytes_left, user_buf.len()); + let src_buf = user_buf + .limit(bytes_to_write) + .expect("bytes_to_write <= user_buf.len()"); + + const TMPBUF_SIZE: usize = 512; + let mut tmp_buf = [0_u8; TMPBUF_SIZE]; + + let mut bytes_written = 0; + + // TODO: Modify VecDeque so that the unwritten portions can be accessed directly? + for (idx, chunk) in src_buf.in_variable_chunks(TMPBUF_SIZE).enumerate() { + let chunk_byte_count = match chunk.copy_common_bytes_to_slice(&mut tmp_buf) { + Ok(c) => c, + Err(_) if idx > 0 => break, + Err(error) => return Err(error), + }; + vec.extend(&tmp_buf[..chunk_byte_count]); + bytes_written += chunk_byte_count; + } - event::trigger(self.scheme_id, self.read_id, EVENT_READ); - self.condition.notify(); + if bytes_written > 0 { + event::trigger(GlobalSchemes::Pipe.scheme_id(), key, EVENT_READ); + pipe.read_condition.notify(); - Ok(buf.len()) - } else { - Err(Error::new(EPIPE)) + return Ok(bytes_written); + } else if user_buf.is_empty() { + return Ok(0); + } + + if fcntl_flags & O_NONBLOCK as u32 != 0 { + return Err(Error::new(EAGAIN)); + } else if !pipe.write_condition.wait(vec, "PipeWrite::write") { + return Err(Error::new(EINTR)); } - } else { - panic!("PipeWrite dropped before write"); } } -} + fn kfstat(&self, _id: usize, buf: UserSliceWo) -> Result<()> { + buf.copy_exactly(&Stat { + st_mode: MODE_FIFO | 0o666, + ..Default::default() + })?; -impl Drop for PipeWrite { - fn drop(&mut self) { - drop(self.vec.take()); - event::trigger(self.scheme_id, self.read_id, EVENT_READ); - self.condition.notify(); + Ok(()) } } + +pub struct Pipe { + read_condition: WaitCondition, // signals whether there are available bytes to read + write_condition: WaitCondition, // signals whether there is room for additional bytes + queue: Mutex>, + reader_is_alive: AtomicBool, // starts set, unset when reader closes + writer_is_alive: AtomicBool, // starts set, unset when writer closes + has_run_dup: AtomicBool, +} diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 5a04f9d5..f1ce8966 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,479 +1,1327 @@ use crate::{ - arch::paging::VirtualAddress, - context::{self, ContextId, Status}, + arch::paging::{Page, VirtualAddress}, + context::{ + self, + context::{HardBlockedReason, SignalState}, + file::{FileDescriptor, InternalFlags}, + memory::{handle_notify_files, AddrSpace, AddrSpaceWrapper, Grant, PageSpan}, + Context, Status, + }, + memory::{get_page_info, AddRefError, RefKind, PAGE_SIZE}, ptrace, - scheme::{ATOMIC_SCHEMEID_INIT, AtomicSchemeId, SchemeId}, - syscall::validate + scheme::{self, FileHandle, KernelScheme}, + syscall::{ + data::{GrantDesc, Map, SetSighandlerData, Stat}, + error::*, + flag::*, + usercopy::{UserSliceRo, UserSliceRw, UserSliceWo}, + EnvRegisters, FloatRegisters, IntRegisters, + }, }; +use super::{CallerCtx, GlobalSchemes, KernelSchemes, OpenResult}; +use ::syscall::{ProcSchemeAttrs, SigProcControl, Sigcontrol}; use alloc::{ - collections::BTreeMap, - sync::Arc + boxed::Box, + collections::{btree_map::Entry, BTreeMap}, + string::String, + sync::{Arc, Weak}, + vec::Vec, }; +use arrayvec::ArrayString; use core::{ - cmp, - mem, - slice, - sync::atomic::{AtomicUsize, Ordering} -}; -use spin::{Mutex, RwLock}; -use syscall::{ - data::{FloatRegisters, IntRegisters, PtraceEvent}, - error::*, - flag::*, - scheme::Scheme + mem::{self, size_of}, + num::NonZeroUsize, + slice, str, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, }; +use spin::RwLock; +use spinning_top::RwSpinlock; + +fn read_from(dst: UserSliceWo, src: &[u8], offset: u64) -> Result { + let avail_src = usize::try_from(offset) + .ok() + .and_then(|o| src.get(o..)) + .unwrap_or(&[]); + dst.copy_common_bytes_from_slice(avail_src) +} + +fn try_stop_context( + context_ref: Arc>, + callback: impl FnOnce(&mut Context) -> Result, +) -> Result { + if context::is_current(&context_ref) { + return callback(&mut *context_ref.write()); + } + // Stop process + let (prev_status, mut running) = { + let mut context = context_ref.write(); + + ( + core::mem::replace( + &mut context.status, + context::Status::HardBlocked { + reason: HardBlockedReason::NotYetStarted, + }, + ), + context.running, + ) + }; + + // Wait until stopped + while running { + context::switch(); + + running = context_ref.read().running; + } + + let mut context = context_ref.write(); + assert!( + !context.running, + "process can't have been restarted, we stopped it!" + ); + + let ret = callback(&mut *context); + + context.status = prev_status; -#[derive(Clone, Copy)] + ret +} + +#[derive(Clone, Copy, PartialEq, Eq)] enum RegsKind { Float, - Int + Int, + Env, } -#[derive(Clone, Copy)] -enum Operation { - Memory(VirtualAddress), +#[derive(Clone)] +enum ContextHandle { + // Opened by the process manager, after which it is locked. This capability is used to open + // Attr handles, to set ens/euid/egid/pid. + Authority, + Attr, + + Status { + privileged: bool, + }, // can write ContextVerb + Regs(RegsKind), - Trace { - new_child: Option - } + Sighandler, + Start, + NewFiletable { + filetable: Arc>>>, + data: Box<[u8]>, + }, + Filetable { + filetable: Weak>>>, + data: Box<[u8]>, + }, + AddrSpace { + addrspace: Arc, + }, + CurrentAddrSpace, + + AwaitingAddrSpaceChange { + new: Arc, + new_sp: usize, + new_ip: usize, + }, + + CurrentFiletable, + + AwaitingFiletableChange { + new_ft: Arc>>>, + }, + + // TODO: Remove this once openat is implemented, or allow openat-via-dup via e.g. the top-level + // directory. + OpenViaDup, + SchedAffinity, + + MmapMinAddr(Arc), } - -#[derive(Clone, Copy)] +#[derive(Clone)] struct Handle { - flags: usize, - pid: ContextId, - operation: Operation + context: Arc>, + kind: ContextHandle, } -impl Handle { - fn continue_ignored_child(&mut self) -> Option<()> { - let pid = match self.operation { - Operation::Trace { ref mut new_child } => new_child.take()?, - _ => return None +pub struct ProcScheme; + +static NEXT_ID: AtomicUsize = AtomicUsize::new(1); +// Using BTreeMap as hashbrown doesn't have a const constructor. +static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); + +#[cfg(feature = "debugger")] +#[allow(dead_code)] +pub fn foreach_addrsp(mut f: impl FnMut(&Arc)) { + for (_, handle) in HANDLES.read().iter() { + let Handle { + kind: ContextHandle::AddrSpace { addrspace, .. }, + .. + } = handle + else { + continue; }; - if ptrace::is_traced(pid) { - return None; - } - let contexts = context::contexts(); - let context = contexts.get(pid)?; - let mut context = context.write(); - context.ptrace_stop = false; - Some(()) + f(&addrspace); } } -pub static PROC_SCHEME_ID: AtomicSchemeId = ATOMIC_SCHEMEID_INIT; +fn new_handle((handle, fl): (Handle, InternalFlags)) -> Result<(usize, InternalFlags)> { + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + let _ = HANDLES.write().insert(id, handle); + Ok((id, fl)) +} -pub struct ProcScheme { - next_id: AtomicUsize, - handles: RwLock>>> +enum OpenTy { + Ctxt(Arc>), + Auth, } impl ProcScheme { - pub fn new(scheme_id: SchemeId) -> Self { - PROC_SCHEME_ID.store(scheme_id, Ordering::SeqCst); + fn openat_context( + &self, + path: &str, + context: Arc>, + ) -> Result> { + Ok(Some(match path { + "addrspace" => ( + ContextHandle::AddrSpace { + addrspace: Arc::clone( + context + .read() + .addr_space() + .map_err(|_| Error::new(ENOENT))?, + ), + }, + true, + ), + "filetable" => ( + ContextHandle::Filetable { + filetable: Arc::downgrade(&context.read().files), + data: Box::new([]), + }, + true, + ), + "current-addrspace" => (ContextHandle::CurrentAddrSpace, false), + "current-filetable" => (ContextHandle::CurrentFiletable, false), + "regs/float" => (ContextHandle::Regs(RegsKind::Float), false), + "regs/int" => (ContextHandle::Regs(RegsKind::Int), false), + "regs/env" => (ContextHandle::Regs(RegsKind::Env), false), + "sighandler" => (ContextHandle::Sighandler, false), + "start" => (ContextHandle::Start, false), + "open_via_dup" => (ContextHandle::OpenViaDup, false), + "mmap-min-addr" => ( + ContextHandle::MmapMinAddr(Arc::clone( + context + .read() + .addr_space() + .map_err(|_| Error::new(ENOENT))?, + )), + false, + ), + "sched-affinity" => (ContextHandle::SchedAffinity, true), + "status" => (ContextHandle::Status { privileged: false }, false), + _ if path.starts_with("auth-") => { + let nonprefix = &path["auth-".len()..]; + let next_dash = nonprefix.find('-').ok_or(Error::new(ENOENT))?; + let auth_fd = nonprefix[..next_dash] + .parse::() + .map_err(|_| Error::new(ENOENT))?; + let actual_name = &nonprefix[next_dash + 1..]; + + let handle = match actual_name { + "attrs" => ContextHandle::Attr, + "status" => ContextHandle::Status { privileged: true }, + _ => return Err(Error::new(ENOENT)), + }; - Self { - next_id: AtomicUsize::new(0), - handles: RwLock::new(BTreeMap::new()), - } + let (hopefully_this_scheme, number) = extract_scheme_number(auth_fd)?; + verify_scheme(hopefully_this_scheme)?; + if !matches!( + HANDLES.read().get(&number).ok_or(Error::new(ENOENT))?.kind, + ContextHandle::Authority + ) { + return Err(Error::new(ENOENT)); + } + + (handle, false) + } + _ => return Ok(None), + })) } -} + fn open_inner( + &self, + ty: OpenTy, + operation_str: Option<&str>, + flags: usize, + ) -> Result<(usize, InternalFlags)> { + let operation_name = operation_str.ok_or(Error::new(EINVAL))?; + let (mut handle, positioned) = match ty { + OpenTy::Ctxt(context) => { + if let Some((kind, positioned)) = + self.openat_context(operation_name, Arc::clone(&context))? + { + (Handle { context, kind }, positioned) + } else { + return Err(Error::new(EINVAL)); + } + } + OpenTy::Auth => { + extern "C" fn ret() {} + let context = match operation_str.ok_or(Error::new(ENOENT))? { + "new-context" => { + let id = NonZeroUsize::new(NEXT_ID.fetch_add(1, Ordering::Relaxed)) + .ok_or(Error::new(EMFILE))?; + let context = context::spawn(true, Some(id), ret)?; + HANDLES.write().insert( + id.get(), + Handle { + context, + kind: ContextHandle::OpenViaDup, + }, + ); + return Ok((id.get(), InternalFlags::empty())); + } + "cur-context" => context::current(), + _ => return Err(Error::new(ENOENT)), + }; -impl Scheme for ProcScheme { - fn open(&self, path: &[u8], flags: usize, uid: u32, gid: u32) -> Result { - let path = core::str::from_utf8(path).map_err(|_| Error::new(EINVAL))?; - let mut parts = path.splitn(2, '/'); - let pid = parts.next() - .and_then(|s| s.parse().ok()) - .map(ContextId::from) - .ok_or(Error::new(EINVAL))?; - let operation = match parts.next() { - Some("mem") => Operation::Memory(VirtualAddress::new(0)), - Some("regs/float") => Operation::Regs(RegsKind::Float), - Some("regs/int") => Operation::Regs(RegsKind::Int), - Some("trace") => Operation::Trace { - new_child: None - }, - _ => return Err(Error::new(EINVAL)) + ( + Handle { + context, + kind: ContextHandle::OpenViaDup, + }, + false, + ) + } }; - let contexts = context::contexts(); - let target = contexts.get(pid).ok_or(Error::new(ESRCH))?; - { - let target = target.read(); - - if let Status::Exited(_) = target.status { - return Err(Error::new(ESRCH)); + let filetable_opt = match handle { + Handle { + kind: + ContextHandle::Filetable { + ref filetable, + ref mut data, + }, + .. + } => Some((filetable.upgrade().ok_or(Error::new(EOWNERDEAD))?, data)), + Handle { + kind: + ContextHandle::NewFiletable { + ref filetable, + ref mut data, + }, + .. + } => Some((Arc::clone(filetable), data)), + _ => None, + }; + if let Some((filetable, data)) = filetable_opt { + *data = { + use core::fmt::Write; + + let mut data = String::new(); + for index in filetable + .read() + .iter() + .enumerate() + .filter_map(|(idx, val)| val.as_ref().map(|_| idx)) + { + writeln!(data, "{}", index).unwrap(); + } + data.into_bytes().into_boxed_slice() + }; } + }; - // Unless root, check security - if uid != 0 && gid != 0 { - let current = contexts.current().ok_or(Error::new(ESRCH))?; - let current = current.read(); + let (id, int_fl) = new_handle(( + handle.clone(), + if positioned { + InternalFlags::POSITIONED + } else { + InternalFlags::empty() + }, + ))?; - // Do we own the process? - if uid != target.euid && gid != target.egid { - return Err(Error::new(EPERM)); - } + Ok((id, int_fl)) + } +} + +impl KernelScheme for ProcScheme { + fn kopen(&self, path: &str, _flags: usize, _ctx: CallerCtx) -> Result { + if path != "authority" { + return Err(Error::new(ENOENT)); + } + static LOCK: AtomicBool = AtomicBool::new(false); + if LOCK.swap(true, Ordering::Relaxed) { + return Err(Error::new(EEXIST)); + } + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + HANDLES.write().insert( + id, + Handle { + // TODO: placeholder + context: context::current(), + kind: ContextHandle::Authority, + }, + ); + Ok(OpenResult::SchemeLocal(id, InternalFlags::empty())) + } - // Is it a subprocess of us? In the future, a capability - // could bypass this check. - match contexts.anchestors(target.ppid).find(|&(id, _context)| id == current.id) { - Some((id, context)) => { - // Paranoid sanity check, as ptrace security holes - // wouldn't be fun - assert_eq!(id, current.id); - assert_eq!(id, context.read().id); + fn fevent(&self, id: usize, _flags: EventFlags) -> Result { + let handles = HANDLES.read(); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + + match handle { + _ => Ok(EventFlags::empty()), + } + } + + fn close(&self, id: usize) -> Result<()> { + let handle = HANDLES.write().remove(&id).ok_or(Error::new(EBADF))?; + + match handle { + Handle { + context, + kind: + ContextHandle::AwaitingAddrSpaceChange { + new, + new_sp, + new_ip, }, - None => return Err(Error::new(EPERM)) - } + } => { + let _ = try_stop_context(context, |context: &mut Context| { + let regs = context.regs_mut().ok_or(Error::new(EBADFD))?; + regs.set_instr_pointer(new_ip); + regs.set_stack_pointer(new_sp); + + Ok(context.set_addr_space(Some(new))) + })?; + let _ = ptrace::send_event(crate::syscall::ptrace_event!( + PTRACE_EVENT_ADDRSPACE_SWITCH, + 0 + )); } + Handle { + kind: ContextHandle::AddrSpace { addrspace } | ContextHandle::MmapMinAddr(addrspace), + .. + } => drop(addrspace), + + Handle { + kind: ContextHandle::AwaitingFiletableChange { new_ft }, + context, + } => { + context.write().files = new_ft; + } + _ => (), } + Ok(()) + } + fn kfmap( + &self, + id: usize, + dst_addr_space: &Arc, + map: &crate::syscall::data::Map, + consume: bool, + ) -> Result { + let handle = HANDLES.read().get(&id).ok_or(Error::new(EBADF))?.clone(); + let Handle { kind, ref context } = handle; + + match kind { + ContextHandle::AddrSpace { ref addrspace } => { + if Arc::ptr_eq(addrspace, dst_addr_space) { + return Err(Error::new(EBUSY)); + } - let id = self.next_id.fetch_add(1, Ordering::SeqCst); + let PageSpan { + base: requested_dst_page, + .. + } = crate::syscall::validate_region(map.address, map.size)?; + let src_span = + PageSpan::validate_nonempty(VirtualAddress::new(map.offset), map.size) + .ok_or(Error::new(EINVAL))?; + + let requested_dst_base = (map.address != 0).then_some(requested_dst_page); + + let mut src_addr_space = addrspace.acquire_write(); + + let src_page_count = NonZeroUsize::new(src_span.count).ok_or(Error::new(EINVAL))?; + + let mut notify_files = Vec::new(); + + // TODO: Validate flags + let result_base = if consume { + dst_addr_space.r#move( + Some((&addrspace, &mut *src_addr_space)), + src_span, + requested_dst_base, + src_page_count.get(), + map.flags, + &mut notify_files, + )? + } else { + let mut dst_addrsp_guard = dst_addr_space.acquire_write(); + dst_addrsp_guard.mmap( + &dst_addr_space, + requested_dst_base, + src_page_count, + map.flags, + &mut notify_files, + |dst_page, _, dst_mapper, flusher| { + Ok(Grant::borrow( + Arc::clone(addrspace), + &mut *src_addr_space, + src_span.base, + dst_page, + src_span.count, + map.flags, + dst_mapper, + flusher, + true, + true, + false, + )?) + }, + )? + }; - if let Operation::Trace { .. } = operation { - if !ptrace::try_new_session(pid, id) { - // There is no good way to handle id being occupied - // for nothing here, is there? - return Err(Error::new(EBUSY)); - } + handle_notify_files(notify_files); - let mut target = target.write(); - target.ptrace_stop = true; + Ok(result_base.start_address().data()) + } + ContextHandle::Sighandler => { + let context = context.read(); + let sig = context.sig.as_ref().ok_or(Error::new(EBADF))?; + let frame = match map.offset { + // tctl + 0 => &sig.thread_control, + // pctl + PAGE_SIZE => &sig.proc_control, + _ => return Err(Error::new(EINVAL)), + }; + let info = get_page_info(frame.get()).ok_or(Error::new(EBADFD))?; + match info.add_ref(RefKind::Shared) { + Ok(()) => (), + Err(AddRefError::RcOverflow) => return Err(Error::new(ENOMEM)), + Err(AddRefError::CowToShared) => unreachable!("cannot be CoW since it's a kernel RaiiFrame that at some point was made Shared"), + Err(AddRefError::SharedToCow) => unreachable!("wasn't requested"), + } + // TODO: Allocated or AllocatedShared? + let addrsp = AddrSpace::current()?; + let page = addrsp.acquire_write().mmap( + &addrsp, + None, + NonZeroUsize::new(1).unwrap(), + MapFlags::PROT_READ | MapFlags::PROT_WRITE, + &mut Vec::new(), + |page, flags, mapper, flusher| { + Grant::allocated_shared_one_page( + frame.get(), + page, + flags, + mapper, + flusher, + false, + ) + }, + )?; + Ok(page.start_address().data()) + } + _ => Err(Error::new(EBADF)), } - - self.handles.write().insert(id, Arc::new(Mutex::new(Handle { - flags, - pid, - operation - }))); - Ok(id) } - - /// Using dup for `proc:` simply opens another operation on the same PID - /// ```rust,ignore - /// let trace = syscall::open("proc:1234/trace")?; - /// - /// // let regs = syscall::open("proc:1234/regs/int")?; - /// let regs = syscall::dup(trace, "regs/int")?; - /// ``` - fn dup(&self, old_id: usize, buf: &[u8]) -> Result { + fn kreadoff( + &self, + id: usize, + buf: UserSliceWo, + offset: u64, + read_flags: u32, + _stored_flags: u32, + ) -> Result { + // Don't hold a global lock during the context switch later on let handle = { - let handles = self.handles.read(); - let handle = handles.get(&old_id).ok_or(Error::new(EBADF))?; - let handle = handle.lock(); - *handle + let handles = HANDLES.read(); + handles.get(&id).ok_or(Error::new(EBADF))?.clone() }; - let mut path = format!("{}/", handle.pid.into()).into_bytes(); - path.extend_from_slice(buf); + match handle { + Handle { context, kind } => kind.kreadoff(id, context, buf, offset), + } + } + fn kcall( + &self, + id: usize, + _payload: UserSliceRw, + _flags: CallFlags, + metadata: &[u64], + ) -> Result { + // TODO: simplify + let handle = { + let mut handles = HANDLES.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + handle.clone() + }; - let (uid, gid) = { - let contexts = context::contexts(); - let context = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context.read(); - (context.euid, context.egid) + let ContextHandle::OpenViaDup = handle.kind else { + return Err(Error::new(EBADF)); }; - self.open(&path, handle.flags, uid, gid) - } + let verb: u8 = (*metadata.get(0).ok_or(Error::new(EINVAL))?) + .try_into() + .map_err(|_| Error::new(EINVAL))?; + let verb = ProcSchemeVerb::try_from_raw(verb).ok_or(Error::new(EINVAL))?; - fn seek(&self, id: usize, pos: usize, whence: usize) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - let mut handle = handle.lock(); - - match handle.operation { - Operation::Memory(ref mut offset) => Ok({ - *offset = VirtualAddress::new(match whence { - SEEK_SET => pos, - SEEK_CUR => cmp::max(0, offset.get() as isize + pos as isize) as usize, - SEEK_END => cmp::max(0, isize::max_value() + pos as isize) as usize, - _ => return Err(Error::new(EBADF)) - }); - offset.get() - }), - _ => Err(Error::new(EBADF)) + match verb { + ProcSchemeVerb::Iopl => context::current().write().set_userspace_io_allowed(true), } + Ok(0) } + fn kwriteoff( + &self, + id: usize, + buf: UserSliceRo, + _offset: u64, + _fcntl_flags: u32, + _stored_flags: u32, + ) -> Result { + // TODO: offset - fn read(&self, id: usize, buf: &mut [u8]) -> Result { // Don't hold a global lock during the context switch later on let handle = { - let handles = self.handles.read(); - Arc::clone(handles.get(&id).ok_or(Error::new(EBADF))?) + let mut handles = HANDLES.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + handle.clone() }; - // TODO: Make sure handle can't deadlock - let mut handle = handle.lock(); - let pid = handle.pid; - - match handle.operation { - Operation::Memory(ref mut offset) => { - let contexts = context::contexts(); - let context = contexts.get(pid).ok_or(Error::new(ESRCH))?; - let context = context.read(); - ptrace::with_context_memory(&context, *offset, buf.len(), |ptr| { - buf.copy_from_slice(validate::validate_slice(ptr, buf.len())?); - Ok(()) - })?; + match handle { + Handle { context, kind } => kind.kwriteoff(id, context, buf), + } + } + fn kfstat(&self, id: usize, buffer: UserSliceWo) -> Result<()> { + let handles = HANDLES.read(); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - *offset = VirtualAddress::new(offset.get() + buf.len()); - Ok(buf.len()) - }, - Operation::Regs(kind) => { - union Output { - float: FloatRegisters, - int: IntRegisters - } - let mut first = true; - let (output, size) = loop { - if !first { - // We've tried this before, so lets wait before retrying - unsafe { context::switch(); } - } - first = false; + buffer.copy_exactly(&Stat { + st_mode: MODE_FILE | 0o666, + st_size: handle.fsize()?, - let contexts = context::contexts(); - let context = contexts.get(handle.pid).ok_or(Error::new(ESRCH))?; - let context = context.read(); + ..Stat::default() + })?; + + Ok(()) + } + + fn fsize(&self, id: usize) -> Result { + let mut handles = HANDLES.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - break match kind { - RegsKind::Float => { - // NOTE: The kernel will never touch floats + handle.fsize() + } - // In the rare case of not having floating - // point registers uninitiated, return - // empty everything. - let fx = context.arch.get_fx_regs().unwrap_or_default(); - (Output { float: fx }, mem::size_of::()) + /// Dup is currently used to implement clone() and execve(). + fn kdup(&self, old_id: usize, raw_buf: UserSliceRo, _: CallerCtx) -> Result { + let info = { + let handles = HANDLES.read(); + let handle = handles.get(&old_id).ok_or(Error::new(EBADF))?; + + handle.clone() + }; + + let handle = |h, positioned| { + ( + h, + if positioned { + InternalFlags::POSITIONED + } else { + InternalFlags::empty() + }, + ) + }; + let mut array = [0_u8; 64]; + if raw_buf.len() > array.len() { + return Err(Error::new(EINVAL)); + } + raw_buf.copy_to_slice(&mut array[..raw_buf.len()])?; + let buf = &array[..raw_buf.len()]; + + new_handle(match info { + Handle { + kind: ContextHandle::Authority, + .. + } => { + return self + .open_inner( + OpenTy::Auth, + Some(core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?) + .filter(|s| !s.is_empty()), + O_RDWR | O_CLOEXEC, + ) + .map(|(r, fl)| OpenResult::SchemeLocal(r, fl)) + } + Handle { + kind: ContextHandle::OpenViaDup, + context, + } => { + return self + .open_inner( + OpenTy::Ctxt(context), + Some(core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?) + .filter(|s| !s.is_empty()), + O_RDWR | O_CLOEXEC, + ) + .map(|(r, fl)| OpenResult::SchemeLocal(r, fl)); + } + + Handle { + kind: + ContextHandle::Filetable { + ref filetable, + ref data, + }, + context, + } => { + // TODO: Maybe allow userspace to either copy or transfer recently dupped file + // descriptors between file tables. + if buf != b"copy" { + return Err(Error::new(EINVAL)); + } + let filetable = filetable.upgrade().ok_or(Error::new(EOWNERDEAD))?; + + let new_filetable = Arc::try_new(RwLock::new(filetable.read().clone())) + .map_err(|_| Error::new(ENOMEM))?; + + handle( + Handle { + kind: ContextHandle::NewFiletable { + filetable: new_filetable, + data: data.clone(), }, - RegsKind::Int => match unsafe { ptrace::regs_for(&context) } { - None => { - // Another CPU is running this process, wait until it's stopped. - continue; - }, - Some(stack) => { - let mut regs = IntRegisters::default(); + context, + }, + true, + ) + } + Handle { + kind: ContextHandle::AddrSpace { ref addrspace }, + context, + } => { + const GRANT_FD_PREFIX: &[u8] = b"grant-fd-"; + + let kind = match buf { + // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But + // in that case, what scheme? + b"empty" => ContextHandle::AddrSpace { + addrspace: AddrSpaceWrapper::new()?, + }, + b"exclusive" => ContextHandle::AddrSpace { + addrspace: addrspace.try_clone()?, + }, + b"mmap-min-addr" => ContextHandle::MmapMinAddr(Arc::clone(addrspace)), + + _ if buf.starts_with(GRANT_FD_PREFIX) => { + let string = core::str::from_utf8(&buf[GRANT_FD_PREFIX.len()..]) + .map_err(|_| Error::new(EINVAL))?; + let page_addr = + usize::from_str_radix(string, 16).map_err(|_| Error::new(EINVAL))?; - stack.save(&mut regs); + if page_addr % PAGE_SIZE != 0 { + return Err(Error::new(EINVAL)); + } - (Output { int: regs }, mem::size_of::()) + let page = Page::containing_address(VirtualAddress::new(page_addr)); + + match addrspace + .acquire_read() + .grants + .contains(page) + .ok_or(Error::new(EINVAL))? + { + (_, info) => { + return Ok(OpenResult::External( + info.file_ref() + .map(|r| Arc::clone(&r.description)) + .ok_or(Error::new(EBADF))?, + )) } } - }; - }; + } - let bytes = unsafe { - slice::from_raw_parts(&output as *const _ as *const u8, mem::size_of::()) + _ => return Err(Error::new(EINVAL)), }; - let len = cmp::min(buf.len(), size); - buf[..len].copy_from_slice(&bytes[..len]); - Ok(len) - }, - Operation::Trace { .. } => { - let read = ptrace::recv_events(handle.pid, unsafe { - slice::from_raw_parts_mut( - buf.as_mut_ptr() as *mut PtraceEvent, - buf.len() / mem::size_of::() - ) - }).unwrap_or(0); - - Ok(read * mem::size_of::()) + handle(Handle { context, kind }, true) } + _ => return Err(Error::new(EINVAL)), + }) + .map(|(r, fl)| OpenResult::SchemeLocal(r, fl)) + } +} +fn extract_scheme_number(fd: usize) -> Result<(KernelSchemes, usize)> { + let (scheme_id, number) = match &*context::current() + .read() + .get_file(FileHandle::from(fd)) + .ok_or(Error::new(EBADF))? + .description + .read() + { + desc => (desc.scheme, desc.number), + }; + let scheme = scheme::schemes() + .get(scheme_id) + .ok_or(Error::new(ENODEV))? + .clone(); + + Ok((scheme, number)) +} +fn verify_scheme(scheme: KernelSchemes) -> Result<()> { + if !matches!(scheme, KernelSchemes::Global(GlobalSchemes::Proc)) { + return Err(Error::new(EBADF)); + } + Ok(()) +} +impl Handle { + fn fsize(&self) -> Result { + match self.kind { + ContextHandle::Filetable { ref data, .. } + | ContextHandle::NewFiletable { ref data, .. } => Ok(data.len() as u64), + _ => Ok(0), } } +} +impl ContextHandle { + fn kwriteoff( + self, + id: usize, + context: Arc>, + buf: UserSliceRo, + ) -> Result { + match self { + Self::AddrSpace { addrspace } => { + let mut chunks = buf.usizes(); + let mut words_read = 0; + let mut next = || { + words_read += 1; + chunks.next().ok_or(Error::new(EINVAL)) + }; - fn write(&self, id: usize, buf: &[u8]) -> Result { - // Don't hold a global lock during the context switch later on - let handle = { - let handles = self.handles.read(); - Arc::clone(handles.get(&id).ok_or(Error::new(EBADF))?) - }; - let mut handle = handle.lock(); - handle.continue_ignored_child(); - - // Some operations borrow Operation:: mutably - let pid = handle.pid; - let flags = handle.flags; - - let mut first = true; - match handle.operation { - Operation::Memory(ref mut offset) => { - let contexts = context::contexts(); - let context = contexts.get(pid).ok_or(Error::new(ESRCH))?; - let context = context.read(); + match next()?? { + op @ ADDRSPACE_OP_MMAP | op @ ADDRSPACE_OP_TRANSFER => { + let fd = next()??; + let offset = next()??; + let page_span = crate::syscall::validate_region(next()??, next()??)?; + let flags = MapFlags::from_bits(next()??).ok_or(Error::new(EINVAL))?; - ptrace::with_context_memory(&context, *offset, buf.len(), |ptr| { - validate::validate_slice_mut(ptr, buf.len())?.copy_from_slice(buf); - Ok(()) - })?; + if !flags.contains(MapFlags::MAP_FIXED) { + return Err(Error::new(EOPNOTSUPP)); + } - *offset = VirtualAddress::new(offset.get() + buf.len()); - Ok(buf.len()) - }, - Operation::Regs(kind) => loop { - if !first { - // We've tried this before, so lets wait before retrying - unsafe { context::switch(); } - } - first = false; + let (scheme, number) = extract_scheme_number(fd)?; - let contexts = context::contexts(); - let context = contexts.get(handle.pid).ok_or(Error::new(ESRCH))?; - let mut context = context.write(); + scheme.kfmap( + number, + &addrspace, + &Map { + offset, + size: page_span.count * PAGE_SIZE, + address: page_span.base.start_address().data(), + flags, + }, + op == ADDRSPACE_OP_TRANSFER, + )?; + } + ADDRSPACE_OP_MUNMAP => { + let page_span = crate::syscall::validate_region(next()??, next()??)?; - break match kind { - RegsKind::Float => { - if buf.len() < mem::size_of::() { - return Ok(0); - } - let regs = unsafe { - *(buf as *const _ as *const FloatRegisters) - }; + let unpin = false; + addrspace.munmap(page_span, unpin)?; + } + ADDRSPACE_OP_MPROTECT => { + let page_span = crate::syscall::validate_region(next()??, next()??)?; + let flags = MapFlags::from_bits(next()??).ok_or(Error::new(EINVAL))?; + + addrspace.mprotect(page_span, flags)?; + } + _ => return Err(Error::new(EINVAL)), + } + Ok(words_read * mem::size_of::()) + } + ContextHandle::Regs(kind) => match kind { + RegsKind::Float => { + let regs = unsafe { buf.read_exact::()? }; + try_stop_context(context, |context| { // NOTE: The kernel will never touch floats // Ignore the rare case of floating point // registers being uninitiated - let _ = context.arch.set_fx_regs(regs); + let _ = context.set_fx_regs(regs); Ok(mem::size_of::()) - }, - RegsKind::Int => match unsafe { ptrace::regs_for_mut(&mut context) } { + }) + } + RegsKind::Int => { + let regs = unsafe { buf.read_exact::()? }; + + try_stop_context(context, |context| match context.regs_mut() { None => { - // Another CPU is running this process, wait until it's stopped. - continue; - }, + println!( + "{}:{}: Couldn't read registers from stopped process", + file!(), + line!() + ); + Err(Error::new(ENOTRECOVERABLE)) + } Some(stack) => { - if buf.len() < mem::size_of::() { - return Ok(0); - } - let regs = unsafe { - *(buf as *const _ as *const IntRegisters) - }; - stack.load(®s); Ok(mem::size_of::()) } - } - }; + }) + } + RegsKind::Env => { + let regs = unsafe { buf.read_exact::()? }; + write_env_regs(context, regs)?; + Ok(mem::size_of::()) + } }, - Operation::Trace { ref mut new_child } => { - if buf.len() < 1 { - return Ok(0); + ContextHandle::Sighandler => { + let data = unsafe { buf.read_exact::()? }; + + if data.user_handler >= crate::USER_END_OFFSET + || data.excp_handler >= crate::USER_END_OFFSET + { + return Err(Error::new(EPERM)); + } + if data.thread_control_addr >= crate::USER_END_OFFSET + || data.proc_control_addr >= crate::USER_END_OFFSET + { + return Err(Error::new(EFAULT)); } - let op = buf[0]; - let mut blocking = flags & O_NONBLOCK != O_NONBLOCK; - let mut singlestep = false; + let state = if data.thread_control_addr != 0 && data.proc_control_addr != 0 { + let validate_off = |addr, sz| { + let off = addr % PAGE_SIZE; + if off % mem::align_of::() == 0 && off + sz <= PAGE_SIZE { + Ok(off as u16) + } else { + Err(Error::new(EINVAL)) + } + }; - match op & PTRACE_OPERATIONMASK { - PTRACE_CONT => { ptrace::cont(pid); }, - PTRACE_SYSCALL | PTRACE_SINGLESTEP | PTRACE_SIGNAL => { // <- not a bitwise OR - singlestep = op & PTRACE_OPERATIONMASK == PTRACE_SINGLESTEP; - ptrace::set_breakpoint(pid, op); - }, - PTRACE_WAIT => blocking = true, - _ => return Err(Error::new(EINVAL)) + let addrsp = Arc::clone(context.read().addr_space()?); + + Some(SignalState { + threadctl_off: validate_off( + data.thread_control_addr, + mem::size_of::(), + )?, + procctl_off: validate_off( + data.proc_control_addr, + mem::size_of::(), + )?, + user_handler: NonZeroUsize::new(data.user_handler) + .ok_or(Error::new(EINVAL))?, + excp_handler: NonZeroUsize::new(data.excp_handler), + thread_control: addrsp.borrow_frame_enforce_rw_allocated( + Page::containing_address(VirtualAddress::new(data.thread_control_addr)), + )?, + proc_control: addrsp.borrow_frame_enforce_rw_allocated( + Page::containing_address(VirtualAddress::new(data.proc_control_addr)), + )?, + }) + } else { + None + }; + + context.write().sig = state; + + Ok(mem::size_of::()) + } + ContextHandle::Start => match context.write().status { + ref mut status @ Status::HardBlocked { + reason: HardBlockedReason::NotYetStarted, + } => { + *status = Status::Runnable; + Ok(buf.len()) } + _ => return Err(Error::new(EINVAL)), + }, + ContextHandle::Filetable { .. } | ContextHandle::NewFiletable { .. } => { + Err(Error::new(EBADF)) + } - let mut first = true; - loop { - if !first { - // We've tried this before, so lets wait before retrying - unsafe { context::switch(); } - } - first = false; + ContextHandle::CurrentFiletable => { + let filetable_fd = buf.read_usize()?; + let (hopefully_this_scheme, number) = extract_scheme_number(filetable_fd)?; + verify_scheme(hopefully_this_scheme)?; - let contexts = context::contexts(); - let context = contexts.get(pid).ok_or(Error::new(ESRCH))?; - let mut context = context.write(); - if let Status::Exited(_) = context.status { - return Err(Error::new(ESRCH)); + let mut handles = HANDLES.write(); + let Entry::Occupied(mut entry) = handles.entry(number) else { + return Err(Error::new(EBADF)); + }; + let filetable = match *entry.get_mut() { + Handle { + kind: ContextHandle::Filetable { ref filetable, .. }, + .. + } => filetable.upgrade().ok_or(Error::new(EOWNERDEAD))?, + Handle { + kind: + ContextHandle::NewFiletable { + ref filetable, + ref data, + }, + .. + } => { + let ft = Arc::clone(&filetable); + *entry.get_mut() = Handle { + kind: ContextHandle::Filetable { + filetable: Arc::downgrade(&filetable), + data: data.clone(), + }, + context: Arc::clone(&context), + }; + ft } - if singlestep { - match unsafe { ptrace::regs_for_mut(&mut context) } { - None => continue, - Some(stack) => stack.set_singlestep(true) - } - } + _ => return Err(Error::new(EBADF)), + }; + + *handles.get_mut(&id).ok_or(Error::new(EBADF))? = Handle { + kind: ContextHandle::AwaitingFiletableChange { new_ft: filetable }, + context, + }; - context.ptrace_stop = false; - break; + Ok(mem::size_of::()) + } + ContextHandle::CurrentAddrSpace { .. } => { + let mut iter = buf.usizes(); + let addrspace_fd = iter.next().ok_or(Error::new(EINVAL))??; + let sp = iter.next().ok_or(Error::new(EINVAL))??; + let ip = iter.next().ok_or(Error::new(EINVAL))??; + + let (hopefully_this_scheme, number) = extract_scheme_number(addrspace_fd)?; + verify_scheme(hopefully_this_scheme)?; + + let mut handles = HANDLES.write(); + let Handle { + kind: ContextHandle::AddrSpace { ref addrspace }, + .. + } = handles.get(&number).ok_or(Error::new(EBADF))? + else { + return Err(Error::new(EBADF)); + }; + + *handles.get_mut(&id).ok_or(Error::new(EBADF))? = Handle { + context, + kind: Self::AwaitingAddrSpaceChange { + new: Arc::clone(addrspace), + new_sp: sp, + new_ip: ip, + }, + }; + + Ok(3 * mem::size_of::()) + } + Self::MmapMinAddr(ref addrspace) => { + let val = buf.read_usize()?; + if val % PAGE_SIZE != 0 || val > crate::USER_END_OFFSET { + return Err(Error::new(EINVAL)); } + addrspace.acquire_write().mmap_min = val; + Ok(mem::size_of::()) + } + Self::SchedAffinity => { + let mask = unsafe { buf.read_exact::()? }; + + context.write().sched_affinity.override_from(&mask); + + Ok(mem::size_of_val(&mask)) + } + ContextHandle::Status { privileged } => { + let mut args = buf.usizes(); - if blocking { - if let Some(event) = ptrace::wait(pid)? { - if event.tag == PTRACE_EVENT_CLONE { - *new_child = Some(ContextId::from(unsafe { event.data.clone })); + let user_data = args.next().ok_or(Error::new(EINVAL))??; + + let context_verb = + ContextVerb::try_from_raw(user_data).ok_or(Error::new(EINVAL))?; + + match context_verb { + // TODO: lwp_park/lwp_unpark for bypassing procmgr? + ContextVerb::Unstop | ContextVerb::Stop if !privileged => { + return Err(Error::new(EPERM)) + } + ContextVerb::Stop => { + let mut guard = context.write(); + + match guard.status { + Status::Dead { .. } => return Err(Error::new(EOWNERDEAD)), + Status::HardBlocked { + reason: HardBlockedReason::AwaitingMmap { .. }, + } => todo!(), + _ => (), + } + guard.status = Status::HardBlocked { + reason: HardBlockedReason::Stopped, + }; + // TODO: wait for context to be switched away from, and/or IPI? + Ok(size_of::()) + } + ContextVerb::Unstop => { + let mut guard = context.write(); + + if let Status::HardBlocked { + reason: HardBlockedReason::Stopped, + } = guard.status + { + guard.status = Status::Runnable; + } + Ok(size_of::()) + } + ContextVerb::Interrupt => { + let mut guard = context.write(); + guard.unblock(); + Ok(size_of::()) + } + ContextVerb::ForceKill => { + if context::is_current(&context) { + //log::trace!("FORCEKILL SELF {} {}", context.read().debug_id, context.read().pid); + + // The following functionality simplifies the cleanup step when detached threads + // terminate. + if let Some(post_unmap) = args.next() { + let base = post_unmap?; + let size = args.next().ok_or(Error::new(EINVAL))??; + + if size > 0 { + let addrsp = Arc::clone(context.read().addr_space()?); + let res = addrsp.munmap( + PageSpan::validate_nonempty( + VirtualAddress::new(base), + size, + ) + .ok_or(Error::new(EINVAL))?, + false, + )?; + for r in res { + let _ = r.unmap(); + } + } + } + crate::syscall::exit_this_context(None); + } else { + let mut ctxt = context.write(); + //log::trace!("FORCEKILL NONSELF={} {}, SELF={}", ctxt.debug_id, ctxt.pid, context::current().read().debug_id); + ctxt.status = context::Status::Runnable; + ctxt.being_sigkilled = true; + Ok(mem::size_of::()) } - return Ok(0); } } - - Ok(1) } + ContextHandle::Attr => { + let info = unsafe { buf.read_exact::()? }; + let mut guard = context.write(); + + let len = info + .debug_name + .iter() + .position(|c| *c == 0) + .unwrap_or(info.debug_name.len()) + .min(guard.name.capacity()); + let debug_name = core::str::from_utf8(&info.debug_name[..len]) + .map_err(|_| Error::new(EINVAL))?; + guard.name.clear(); + guard.name.push_str(debug_name); + + guard.pid = info.pid as usize; + guard.ens = (info.ens as usize).into(); + guard.euid = info.euid; + guard.egid = info.egid; + Ok(size_of::()) + } + _ => Err(Error::new(EBADF)), } } + fn kreadoff( + &self, + _id: usize, + context: Arc>, + buf: UserSliceWo, + offset: u64, + ) -> Result { + match self { + ContextHandle::Regs(kind) => { + union Output { + float: FloatRegisters, + int: IntRegisters, + env: EnvRegisters, + } - fn fcntl(&self, id: usize, cmd: usize, arg: usize) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - let mut handle = handle.lock(); - - match cmd { - F_SETFL => { handle.flags = arg; Ok(0) }, - F_GETFL => return Ok(handle.flags), - _ => return Err(Error::new(EINVAL)) - } - } + let (output, size) = match kind { + RegsKind::Float => { + let context = context.read(); + // NOTE: The kernel will never touch floats - fn fevent(&self, id: usize, _flags: usize) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - let handle = handle.lock(); + ( + Output { + float: context.get_fx_regs(), + }, + mem::size_of::(), + ) + } + RegsKind::Int => try_stop_context(context, |context| match context.regs() { + None => { + assert!(!context.running, "try_stop_context is broken, clearly"); + println!( + "{}:{}: Couldn't read registers from stopped process", + file!(), + line!() + ); + Err(Error::new(ENOTRECOVERABLE)) + } + Some(stack) => { + let mut regs = IntRegisters::default(); + stack.save(&mut regs); + Ok((Output { int: regs }, mem::size_of::())) + } + })?, + RegsKind::Env => ( + Output { + env: read_env_regs(context)?, + }, + mem::size_of::(), + ), + }; - Ok(ptrace::session_fevent_flags(handle.pid).expect("proc (fevent): invalid session")) - } + let src_buf = + unsafe { slice::from_raw_parts(&output as *const _ as *const u8, size) }; - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - let handle = handle.lock(); + buf.copy_common_bytes_from_slice(src_buf) + } + ContextHandle::AddrSpace { ref addrspace } => { + let Ok(offset) = usize::try_from(offset) else { + return Ok(0); + }; + let grants_to_skip = offset / mem::size_of::(); + + // Output a list of grant descriptors, sufficient to allow relibc's fork() + // implementation to fmap MAP_SHARED grants. + let mut grants_read = 0; + + let mut dst = [GrantDesc::default(); 16]; + + for (dst, (grant_base, grant_info)) in dst + .iter_mut() + .zip(addrspace.acquire_read().grants.iter().skip(grants_to_skip)) + { + *dst = GrantDesc { + base: grant_base.start_address().data(), + size: grant_info.page_count() * PAGE_SIZE, + flags: grant_info.grant_flags(), + // The !0 is not a sentinel value; the availability of `offset` is + // indicated by the GRANT_SCHEME flag. + offset: grant_info.file_ref().map_or(!0, |f| f.base_offset as u64), + }; + grants_read += 1; + } + for (src, chunk) in dst + .iter() + .take(grants_read) + .zip(buf.in_exact_chunks(mem::size_of::())) + { + chunk.copy_exactly(src)?; + } - let path = format!("proc:{}/{}", handle.pid.into(), match handle.operation { - Operation::Memory(_) => "mem", - Operation::Regs(RegsKind::Float) => "regs/float", - Operation::Regs(RegsKind::Int) => "regs/int", - Operation::Trace { .. } => "trace" - }); + Ok(grants_read * mem::size_of::()) + } - let len = cmp::min(path.len(), buf.len()); - buf[..len].copy_from_slice(&path.as_bytes()[..len]); + ContextHandle::Filetable { data, .. } => read_from(buf, &data, offset), + ContextHandle::MmapMinAddr(ref addrspace) => { + buf.write_usize(addrspace.acquire_read().mmap_min)?; + Ok(mem::size_of::()) + } + ContextHandle::SchedAffinity => { + let mask = context.read().sched_affinity.to_raw(); + + buf.copy_exactly(crate::cpu_set::mask_as_bytes(&mask))?; + Ok(mem::size_of_val(&mask)) + } // TODO: Replace write() with SYS_SENDFD? + ContextHandle::Status { .. } => { + let status = { + let context = context.read(); + match context.status { + Status::Dead { excp: None } => ContextStatus::Dead, + Status::Runnable if context.being_sigkilled => ContextStatus::ForceKilled, + Status::Dead { excp: Some(excp) } => { + let (status, payload) = + buf.split_at(size_of::()).ok_or(Error::new(EINVAL))?; + status.copy_from_slice( + &(ContextStatus::UnhandledExcp as usize).to_ne_bytes(), + )?; + let len = payload.copy_common_bytes_from_slice(&excp)?; + return Ok(size_of::() + len); + } + Status::Runnable => ContextStatus::Runnable, + Status::Blocked => ContextStatus::Blocked, + Status::HardBlocked { + reason: HardBlockedReason::NotYetStarted, + } => ContextStatus::NotYetStarted, + Status::HardBlocked { + reason: HardBlockedReason::Stopped, + } => ContextStatus::Stopped, + _ => ContextStatus::Other, + } + }; + buf.copy_common_bytes_from_slice(&(status as usize).to_ne_bytes()) + } + ContextHandle::Attr => { + let mut debug_name = [0; 32]; + let (euid, egid, ens, pid, name) = match context.read() { + ref c => (c.euid, c.egid, c.ens.get() as u32, c.pid as u32, c.name), + }; + let min = name.len().min(debug_name.len()); + debug_name[..min].copy_from_slice(&name.as_bytes()[..min]); + buf.copy_common_bytes_from_slice(&ProcSchemeAttrs { + pid, + euid, + egid, + ens, + debug_name, + }) + } + ContextHandle::Sighandler => { + let data = match context.read().sig { + Some(ref sig) => SetSighandlerData { + excp_handler: sig.excp_handler.map_or(0, NonZeroUsize::get), + user_handler: sig.user_handler.get(), + proc_control_addr: sig.procctl_off.into(), + thread_control_addr: sig.threadctl_off.into(), + }, + None => SetSighandlerData::default(), + }; + buf.copy_common_bytes_from_slice(&data) + } - Ok(len) + // TODO: Find a better way to switch address spaces, since they also require switching + // the instruction and stack pointer. Maybe remove `/regs` altogether and replace it + // with `/ctx` + _ => return Err(Error::new(EBADF)), + } } +} - fn close(&self, id: usize) -> Result { - let handle = self.handles.write().remove(&id).ok_or(Error::new(EBADF))?; - let mut handle = handle.lock(); - handle.continue_ignored_child(); - - if let Operation::Trace { .. } = handle.operation { - ptrace::close_session(handle.pid); - } +fn write_env_regs(context: Arc>, regs: EnvRegisters) -> Result<()> { + if context::is_current(&context) { + context::current().write().write_current_env_regs(regs) + } else { + try_stop_context(context, |context| context.write_env_regs(regs)) + } +} - let contexts = context::contexts(); - if let Some(context) = contexts.get(handle.pid) { - let mut context = context.write(); - context.ptrace_stop = false; - } - Ok(0) +fn read_env_regs(context: Arc>) -> Result { + if context::is_current(&context) { + context::current().read().read_current_env_regs() + } else { + try_stop_context(context, |context| context.read_env_regs()) } } diff --git a/src/scheme/root.rs b/src/scheme/root.rs index 13f90891..603a392b 100644 --- a/src/scheme/root.rs +++ b/src/scheme/root.rs @@ -1,172 +1,161 @@ -use alloc::sync::Arc; -use alloc::boxed::Box; -use alloc::collections::BTreeMap; -use alloc::vec::Vec; -use core::{cmp, str}; -use core::sync::atomic::{AtomicUsize, Ordering}; -use spin::{Mutex, RwLock}; - -use crate::context; -use crate::syscall::data::Stat; -use crate::syscall::error::*; -use crate::syscall::flag::{O_CREAT, MODE_FILE, MODE_DIR, SEEK_SET, SEEK_CUR, SEEK_END}; -use crate::syscall::scheme::Scheme; -use crate::scheme::{self, SchemeNamespace, SchemeId}; -use crate::scheme::user::{UserInner, UserScheme}; - -struct FolderInner { - data: Box<[u8]>, - pos: Mutex -} - -impl FolderInner { - fn read(&self, buf: &mut [u8]) -> Result { - let mut i = 0; - let mut pos = self.pos.lock(); - - while i < buf.len() && *pos < self.data.len() { - buf[i] = self.data[*pos]; - i += 1; - *pos += 1; - } - - Ok(i) - } - - fn seek(&self, pos: usize, whence: usize) -> Result { - let mut seek = self.pos.lock(); - *seek = match whence { - SEEK_SET => cmp::min(self.data.len(), pos), - SEEK_CUR => cmp::max(0, cmp::min(self.data.len() as isize, *seek as isize + pos as isize)) as usize, - SEEK_END => cmp::max(0, cmp::min(self.data.len() as isize, self.data.len() as isize + pos as isize)) as usize, - _ => return Err(Error::new(EINVAL)) - }; - - Ok(*seek) - } -} +use alloc::{boxed::Box, string::ToString, sync::Arc}; +use core::{ + str, + sync::atomic::{AtomicUsize, Ordering}, +}; +use hashbrown::HashMap; +use spin::RwLock; +use syscall::{ + dirent::{DirEntry, DirentBuf, DirentKind}, + O_EXLOCK, O_FSYNC, +}; + +use crate::{ + context::{self, file::InternalFlags}, + scheme::{ + self, + user::{UserInner, UserScheme}, + SchemeId, SchemeNamespace, + }, + syscall::{ + data::Stat, + error::*, + flag::{EventFlags, MODE_DIR, MODE_FILE, O_CREAT}, + usercopy::{UserSliceRo, UserSliceWo}, + }, +}; + +use super::{CallerCtx, KernelScheme, KernelSchemes, OpenResult}; #[derive(Clone)] enum Handle { Scheme(Arc), File(Arc>), - Folder(Arc) + List { ens: SchemeNamespace }, } pub struct RootScheme { scheme_ns: SchemeNamespace, scheme_id: SchemeId, next_id: AtomicUsize, - handles: RwLock>, + handles: RwLock>, } impl RootScheme { pub fn new(scheme_ns: SchemeNamespace, scheme_id: SchemeId) -> RootScheme { RootScheme { - scheme_ns: scheme_ns, - scheme_id: scheme_id, + scheme_ns, + scheme_id, next_id: AtomicUsize::new(0), - handles: RwLock::new(BTreeMap::new()), + handles: RwLock::new(HashMap::new()), } } } -impl Scheme for RootScheme { - fn open(&self, path: &[u8], flags: usize, uid: u32, _gid: u32) -> Result { - let path_utf8 = str::from_utf8(path).or(Err(Error::new(ENOENT)))?; - let path_trimmed = path_utf8.trim_matches('/'); +impl KernelScheme for RootScheme { + fn kopen(&self, path: &str, flags: usize, ctx: CallerCtx) -> Result { + let path = path.trim_start_matches('/'); //TODO: Make this follow standards for flags and errors if flags & O_CREAT == O_CREAT { - if uid == 0 { - let context = { - let contexts = context::contexts(); - let context = contexts.current().ok_or(Error::new(ESRCH))?; - Arc::downgrade(&context) - }; - - let id = self.next_id.fetch_add(1, Ordering::SeqCst); - - let inner = { - let path_box = path_trimmed.as_bytes().to_vec().into_boxed_slice(); - let mut schemes = scheme::schemes_mut(); - let inner = Arc::new(UserInner::new(self.scheme_id, id, path_box.clone(), flags, context)); - schemes.insert(self.scheme_ns, path_box, |scheme_id| { - inner.scheme_id.store(scheme_id, Ordering::SeqCst); - Arc::new(Box::new(UserScheme::new(Arc::downgrade(&inner)))) - })?; - inner - }; - - self.handles.write().insert(id, Handle::Scheme(inner)); + if ctx.uid != 0 { + return Err(Error::new(EACCES)); + }; - Ok(id) - } else { - Err(Error::new(EACCES)) + if path.contains('/') { + return Err(Error::new(EINVAL)); } - } else if path_trimmed.is_empty() { - let scheme_ns = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - context.ens - }; - let mut data = Vec::new(); - { - let schemes = scheme::schemes(); - for (name, _scheme_id) in schemes.iter_name(scheme_ns) { - data.extend_from_slice(name); - data.push(b'\n'); + let context = Arc::downgrade(&context::current()); + + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + + let inner = { + let path_box = path.to_string().into_boxed_str(); + let mut schemes = scheme::schemes_mut(); + + let v2 = flags & O_FSYNC == O_FSYNC; + let new_close = flags & O_EXLOCK == O_EXLOCK; + + if !v2 { + //log::warn!("Context {} opened a v1 scheme", context::current().read().name); } - } + if !new_close { + /*log::warn!( + "Context {} opened a non-async-close scheme", + context::current().read().name + );*/ + } + + let (_scheme_id, inner) = + schemes.insert_and_pass(self.scheme_ns, path, |scheme_id| { + let inner = Arc::new(UserInner::new( + self.scheme_id, + scheme_id, + // TODO: This is a hack, but eventually the legacy interface will be + // removed. + v2, + new_close, + id, + path_box, + flags, + context, + )); + ( + KernelSchemes::User(UserScheme::new(Arc::downgrade(&inner))), + inner, + ) + })?; + + inner + }; - let inner = Arc::new(FolderInner { - data: data.into_boxed_slice(), - pos: Mutex::new(0) - }); + self.handles.write().insert(id, Handle::Scheme(inner)); - let id = self.next_id.fetch_add(1, Ordering::SeqCst); - self.handles.write().insert(id, Handle::Folder(inner)); - Ok(id) + Ok(OpenResult::SchemeLocal(id, InternalFlags::empty())) + } else if path.is_empty() { + let ens = context::current().read().ens; + + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + self.handles.write().insert(id, Handle::List { ens }); + Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED)) } else { - let inner = Arc::new( - path_trimmed.as_bytes().to_vec().into_boxed_slice() - ); + let inner = Arc::new(path.as_bytes().to_vec().into_boxed_slice()); - let id = self.next_id.fetch_add(1, Ordering::SeqCst); + let id = self.next_id.fetch_add(1, Ordering::Relaxed); self.handles.write().insert(id, Handle::File(inner)); - Ok(id) + Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED)) } } - fn unlink(&self, path: &[u8], uid: u32, _gid: u32) -> Result { - let path_utf8 = str::from_utf8(path).or(Err(Error::new(ENOENT)))?; - let path_trimmed = path_utf8.trim_matches('/'); + fn unlink(&self, path: &str, ctx: CallerCtx) -> Result<()> { + let path = path.trim_matches('/'); - if uid == 0 { - let inner = { - let handles = self.handles.read(); - handles.iter().find_map(|(_id, handle)| { + if ctx.uid != 0 { + return Err(Error::new(EACCES)); + } + let inner = { + let handles = self.handles.read(); + handles + .iter() + .find_map(|(_id, handle)| { match handle { Handle::Scheme(inner) => { - if path_trimmed.as_bytes() == inner.name.as_ref() { + if path == inner.name.as_ref() { return Some(inner.clone()); } - }, + } _ => (), } None - }).ok_or(Error::new(ENOENT))? - }; + }) + .ok_or(Error::new(ENOENT))? + }; - inner.unmount() - } else { - Err(Error::new(EACCES)) - } + inner.unmount() } - fn read(&self, file: usize, buf: &mut [u8]) -> Result { + fn fsize(&self, file: usize) -> Result { let handle = { let handles = self.handles.read(); let handle = handles.get(&file).ok_or(Error::new(EBADF))?; @@ -174,19 +163,13 @@ impl Scheme for RootScheme { }; match handle { - Handle::Scheme(inner) => { - inner.read(buf) - }, - Handle::File(_) => { - Err(Error::new(EBADF)) - }, - Handle::Folder(inner) => { - inner.read(buf) - } + Handle::Scheme(_) => Err(Error::new(EBADF)), + Handle::File(_) => Err(Error::new(EBADF)), + Handle::List { .. } => Ok(0), } } - fn write(&self, file: usize, buf: &[u8]) -> Result { + fn fevent(&self, file: usize, flags: EventFlags) -> Result { let handle = { let handles = self.handles.read(); let handle = handles.get(&file).ok_or(Error::new(EBADF))?; @@ -194,96 +177,122 @@ impl Scheme for RootScheme { }; match handle { - Handle::Scheme(inner) => { - inner.write(buf) - }, - Handle::File(_) => { - Err(Error::new(EBADF)) - }, - Handle::Folder(_) => { - Err(Error::new(EBADF)) - } + Handle::Scheme(inner) => inner.fevent(flags), + Handle::File(_) => Err(Error::new(EBADF)), + Handle::List { .. } => Err(Error::new(EBADF)), } } - fn seek(&self, file: usize, pos: usize, whence: usize) -> Result { + fn kfpath(&self, file: usize, mut buf: UserSliceWo) -> Result { let handle = { let handles = self.handles.read(); let handle = handles.get(&file).ok_or(Error::new(EBADF))?; handle.clone() }; + let mut bytes_copied = buf.copy_common_bytes_from_slice(b":")?; + buf = buf.advance(bytes_copied).ok_or(Error::new(EINVAL))?; + match handle { - Handle::Scheme(_) => { - Err(Error::new(EBADF)) - }, - Handle::File(_) => { - Err(Error::new(EBADF)) - }, - Handle::Folder(inner) => { - inner.seek(pos, whence) + Handle::Scheme(inner) => { + bytes_copied += buf.copy_common_bytes_from_slice(inner.name.as_bytes())?; } + Handle::File(inner) => { + bytes_copied += buf.copy_common_bytes_from_slice(&inner)?; + } + Handle::List { .. } => (), } + + Ok(bytes_copied) } - fn fevent(&self, file: usize, flags: usize) -> Result { + fn fsync(&self, file: usize) -> Result<()> { let handle = { let handles = self.handles.read(); let handle = handles.get(&file).ok_or(Error::new(EBADF))?; handle.clone() }; + match handle { + Handle::Scheme(inner) => inner.fsync(), + Handle::File(_) => Err(Error::new(EBADF)), + Handle::List { .. } => Err(Error::new(EBADF)), + } + } + + fn close(&self, file: usize) -> Result<()> { + let handle = self + .handles + .write() + .remove(&file) + .ok_or(Error::new(EBADF))?; match handle { Handle::Scheme(inner) => { - inner.fevent(flags) - }, - Handle::File(_) => { - Err(Error::new(EBADF)) - }, - Handle::Folder(_) => { - Err(Error::new(EBADF)) + scheme::schemes_mut().remove(inner.scheme_id); } + _ => (), } + Ok(()) } - - fn fpath(&self, file: usize, buf: &mut [u8]) -> Result { + fn kreadoff( + &self, + file: usize, + buf: UserSliceWo, + _offset: u64, + flags: u32, + _stored_flags: u32, + ) -> Result { let handle = { let handles = self.handles.read(); let handle = handles.get(&file).ok_or(Error::new(EBADF))?; handle.clone() }; - let mut i = 0; - let scheme_path = b":"; - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; + match handle { + Handle::Scheme(inner) => inner.read(buf, flags), + Handle::File(_) => Err(Error::new(EBADF)), + Handle::List { .. } => Err(Error::new(EISDIR)), } + } + fn getdents( + &self, + id: usize, + buf: UserSliceWo, + header_size: u16, + opaque: u64, + ) -> Result { + let Handle::List { ens } = *self.handles.read().get(&id).ok_or(Error::new(EBADF))? else { + return Err(Error::new(ENOTDIR)); + }; - match handle { - Handle::Scheme(inner) => { - let mut j = 0; - while i < buf.len() && j < inner.name.len() { - buf[i] = inner.name[j]; - i += 1; - j += 1; - } - }, - Handle::File(inner) => { - let mut j = 0; - while i < buf.len() && j < inner.len() { - buf[i] = inner[j]; - i += 1; - j += 1; - } - }, - Handle::Folder(_) => () + let mut buf = DirentBuf::new(buf, header_size).ok_or(Error::new(EIO))?; + { + let schemes = scheme::schemes(); + for (i, (name, _)) in schemes + .iter_name(ens) + .enumerate() + .skip_while(|(i, _)| (*i as u64) < opaque) + .filter(|(_, (name, _))| !name.is_empty()) + { + buf.entry(DirEntry { + kind: DirentKind::Unspecified, + name, + inode: 0, + next_opaque_id: i as u64 + 1, + })?; + } } - Ok(i) + Ok(buf.finalize()) } - fn fstat(&self, file: usize, stat: &mut Stat) -> Result { + fn kwrite( + &self, + file: usize, + buf: UserSliceRo, + _flags: u32, + _stored_flags: u32, + ) -> Result { let handle = { let handles = self.handles.read(); let handle = handles.get(&file).ok_or(Error::new(EBADF))?; @@ -291,59 +300,34 @@ impl Scheme for RootScheme { }; match handle { - Handle::Scheme(_) => { - stat.st_mode = MODE_FILE; - stat.st_uid = 0; - stat.st_gid = 0; - stat.st_size = 0; - }, - Handle::File(_) => { - stat.st_mode = MODE_FILE; - stat.st_uid = 0; - stat.st_gid = 0; - stat.st_size = 0; - }, - Handle::Folder(inner) => { - stat.st_mode = MODE_DIR; - stat.st_uid = 0; - stat.st_gid = 0; - stat.st_size = inner.data.len() as u64; - } + Handle::Scheme(inner) => inner.write(buf), + Handle::File(_) => Err(Error::new(EBADF)), + Handle::List { .. } => Err(Error::new(EISDIR)), } - - Ok(0) } - fn fsync(&self, file: usize) -> Result { + fn kfstat(&self, file: usize, buf: UserSliceWo) -> Result<()> { let handle = { let handles = self.handles.read(); let handle = handles.get(&file).ok_or(Error::new(EBADF))?; handle.clone() }; - match handle { - Handle::Scheme(inner) => { - inner.fsync() + buf.copy_exactly(&match handle { + Handle::Scheme(_) => Stat { + st_mode: MODE_FILE, + ..Default::default() }, - Handle::File(_) => { - Err(Error::new(EBADF)) + Handle::File(_) => Stat { + st_mode: MODE_FILE, + ..Default::default() }, - Handle::Folder(_) => { - Err(Error::new(EBADF)) - } - } - } - - fn close(&self, file: usize) -> Result { - let handle = self.handles.write().remove(&file).ok_or(Error::new(EBADF))?; - match handle { - Handle::Scheme(inner) => { - let scheme_id = inner.scheme_id.load(Ordering::SeqCst); - let mut schemes = scheme::schemes_mut(); - schemes.remove(scheme_id); + Handle::List { .. } => Stat { + st_mode: MODE_DIR, + ..Default::default() }, - _ => () - } - Ok(0) + })?; + + Ok(()) } } diff --git a/src/scheme/serio.rs b/src/scheme/serio.rs new file mode 100644 index 00000000..191f367f --- /dev/null +++ b/src/scheme/serio.rs @@ -0,0 +1,113 @@ +//! PS/2 unfortunately requires a kernel driver to prevent race conditions due +//! to how status is utilized +use core::{ + str, + sync::atomic::{AtomicUsize, Ordering}, +}; + +use spin::RwLock; + +use crate::{ + event, + scheme::*, + sync::WaitQueue, + syscall::{ + flag::{EventFlags, EVENT_READ, O_NONBLOCK}, + usercopy::UserSliceWo, + }, +}; + +static NEXT_ID: AtomicUsize = AtomicUsize::new(0); + +/// Input queue +static INPUT: [WaitQueue; 2] = [WaitQueue::new(), WaitQueue::new()]; + +#[derive(Clone, Copy)] +struct Handle { + index: usize, +} + +// Using BTreeMap as hashbrown doesn't have a const constructor. +static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); + +/// Add to the input queue +pub fn serio_input(index: usize, data: u8) { + #[cfg(feature = "profiling")] + crate::profiling::serio_command(index, data); + + INPUT[index].send(data); + + for (id, _handle) in HANDLES.read().iter() { + event::trigger(GlobalSchemes::Serio.scheme_id(), *id, EVENT_READ); + } +} + +pub struct SerioScheme; + +impl KernelScheme for SerioScheme { + fn kopen(&self, path: &str, _flags: usize, ctx: CallerCtx) -> Result { + if ctx.uid != 0 { + return Err(Error::new(EPERM)); + } + + let index = path.parse::().or(Err(Error::new(ENOENT)))?; + if index >= INPUT.len() { + return Err(Error::new(ENOENT)); + } + + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + HANDLES.write().insert(id, Handle { index }); + + Ok(OpenResult::SchemeLocal(id, InternalFlags::empty())) + } + + fn fevent(&self, id: usize, _flags: EventFlags) -> Result { + let _handle = { + let handles = HANDLES.read(); + *handles.get(&id).ok_or(Error::new(EBADF))? + }; + + Ok(EventFlags::empty()) + } + + fn fsync(&self, id: usize) -> Result<()> { + let _handle = { + let handles = HANDLES.read(); + *handles.get(&id).ok_or(Error::new(EBADF))? + }; + + Ok(()) + } + + /// Close the file `number` + fn close(&self, id: usize) -> Result<()> { + let _handle = { + let mut handles = HANDLES.write(); + handles.remove(&id).ok_or(Error::new(EBADF))? + }; + + Ok(()) + } + fn kread(&self, id: usize, buf: UserSliceWo, flags: u32, _stored_flags: u32) -> Result { + let handle = { + let handles = HANDLES.read(); + *handles.get(&id).ok_or(Error::new(EBADF))? + }; + + INPUT[handle.index].receive_into_user( + buf, + flags & O_NONBLOCK as u32 == 0, + "SerioScheme::read", + ) + } + + fn kfpath(&self, id: usize, buf: UserSliceWo) -> Result { + let handle = { + let handles = HANDLES.read(); + *handles.get(&id).ok_or(Error::new(EBADF))? + }; + let path = format!("serio:{}", handle.index).into_bytes(); + + buf.copy_common_bytes_from_slice(&path) + } +} diff --git a/src/scheme/sys/block.rs b/src/scheme/sys/block.rs new file mode 100644 index 00000000..54fe5b19 --- /dev/null +++ b/src/scheme/sys/block.rs @@ -0,0 +1,32 @@ +use alloc::{string::String, vec::Vec}; +use core::fmt::Write; + +use crate::{context, syscall::error::Result}; + +pub fn resource() -> Result> { + let mut string = String::new(); + + { + let mut rows = Vec::new(); + { + let contexts = context::contexts(); + for context_lock in contexts.iter().filter_map(|r| r.upgrade()) { + let context = context_lock.read(); + rows.push((context.pid, context.name.clone(), context.status_reason)); + } + } + + for row in rows.iter() { + let id: usize = row.0.into(); + let name = &row.1; + + let _ = writeln!(string, "{}: {}", id, name); + + if !row.2.is_empty() { + let _ = writeln!(string, " {}", row.2); + } + } + } + + Ok(string.into_bytes()) +} diff --git a/src/scheme/sys/context.rs b/src/scheme/sys/context.rs index c25f40dd..91f74b9c 100644 --- a/src/scheme/sys/context.rs +++ b/src/scheme/sys/context.rs @@ -1,49 +1,44 @@ -use alloc::string::String; -use alloc::vec::Vec; -use core::str; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; -use crate::context; -use crate::syscall::error::Result; +use crate::{context, paging::PAGE_SIZE, syscall::error::Result}; pub fn resource() -> Result> { - let mut string = format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<8}{}\n", - "PID", - "PGID", - "PPID", - "RUID", - "RGID", - "RNS", - "EUID", - "EGID", - "ENS", - "STAT", - "CPU", - "MEM", - "NAME"); + let mut string = format!( + "{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<11}{:<12}{:<8}{}\n", + "PID", "EUID", "EGID", "ENS", "STAT", "CPU", "AFFINITY", "TIME", "MEM", "NAME" + ); { let contexts = context::contexts(); - for (_id, context_lock) in contexts.iter() { - let context = context_lock.read(); + for context_ref in contexts.iter().filter_map(|r| r.upgrade()) { + let context = context_ref.read(); let mut stat_string = String::new(); - if context.stack.is_some() { - stat_string.push('U'); + // TODO: All user programs must have some grant in order for executable memory to even + // exist, but is this a good indicator of whether it is user or kernel? + stat_string.push(if let Ok(addr_space) = context.addr_space() { + if addr_space.acquire_read().grants.is_empty() { + 'K' + } else { + 'U' + } } else { - stat_string.push('K'); - } + 'R' + }); match context.status { context::Status::Runnable => { stat_string.push('R'); - }, - context::Status::Blocked => if context.wake.is_some() { - stat_string.push('S'); - } else { - stat_string.push('B'); - }, - context::Status::Stopped(_sig) => { - stat_string.push('T'); } - context::Status::Exited(_status) => { + context::Status::Blocked | context::Status::HardBlocked { .. } => { + if context.wake.is_some() { + stat_string.push('S'); + } else { + stat_string.push('B'); + } + } + context::Status::Dead { .. } => { stat_string.push('Z'); } } @@ -56,31 +51,29 @@ pub fn resource() -> Result> { } else { format!("?") }; + let affinity = context.sched_affinity.to_string(); - let mut memory = 0; - if let Some(ref kfx) = context.kstack { - memory += kfx.len(); - } + let cpu_time_s = context.cpu_time / crate::time::NANOS_PER_SEC; + let cpu_time_ns = context.cpu_time % crate::time::NANOS_PER_SEC; + let cpu_time_string = format!( + "{:02}:{:02}:{:02}.{:02}", + cpu_time_s / 3600, + (cpu_time_s / 60) % 60, + cpu_time_s % 60, + cpu_time_ns / 10_000_000 + ); + + let mut memory = context.kfx.len(); if let Some(ref kstack) = context.kstack { memory += kstack.len(); } - for shared_mem in context.image.iter() { - shared_mem.with(|mem| { - memory += mem.size(); - }); - } - if let Some(ref heap) = context.heap { - heap.with(|heap| { - memory += heap.size(); - }); - } - if let Some(ref stack) = context.stack { - stack.with(|stack| { - memory += stack.size(); - }); - } - if let Some(ref sigstack) = context.sigstack { - memory += sigstack.size(); + if let Ok(addr_space) = context.addr_space() { + for (_base, info) in addr_space.acquire_read().grants.iter() { + // TODO: method + if matches!(info.provider, context::memory::Provider::Allocated { .. }) { + memory += info.page_count() * PAGE_SIZE; + } + } } let memory_string = if memory >= 1024 * 1024 * 1024 { @@ -93,23 +86,19 @@ pub fn resource() -> Result> { format!("{} B", memory) }; - let name_bytes = context.name.lock(); - let name = str::from_utf8(&name_bytes).unwrap_or(""); - - string.push_str(&format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<8}{}\n", - context.id.into(), - context.pgid.into(), - context.ppid.into(), - context.ruid, - context.rgid, - context.rns.into(), - context.euid, - context.egid, - context.ens.into(), - stat_string, - cpu_string, - memory_string, - name)); + string.push_str(&format!( + "{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<11}{:<12}{:<8}{}\n", + context.pid, + context.euid, + context.egid, + context.ens.get(), + stat_string, + cpu_string, + affinity, + cpu_time_string, + memory_string, + context.name, + )); } } diff --git a/src/scheme/sys/cpu.rs b/src/scheme/sys/cpu.rs index 01854c81..f9f2c672 100644 --- a/src/scheme/sys/cpu.rs +++ b/src/scheme/sys/cpu.rs @@ -1,13 +1,15 @@ use alloc::vec::Vec; -use crate::device::cpu::cpu_info; -use crate::syscall::error::{Error, EIO, Result}; +use crate::{ + device::cpu::cpu_info, + syscall::error::{Error, Result, EIO}, +}; pub fn resource() -> Result> { let mut string = format!("CPUs: {}\n", crate::cpu_count()); match cpu_info(&mut string) { Ok(()) => Ok(string.into_bytes()), - Err(_) => Err(Error::new(EIO)) + Err(_) => Err(Error::new(EIO)), } } diff --git a/src/scheme/sys/exe.rs b/src/scheme/sys/exe.rs index 00849ace..1b543900 100644 --- a/src/scheme/sys/exe.rs +++ b/src/scheme/sys/exe.rs @@ -1,16 +1,7 @@ use alloc::vec::Vec; -use crate::context; -use crate::syscall::error::{Error, ESRCH, Result}; +use crate::{context, syscall::error::Result}; pub fn resource() -> Result> { - let mut name = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let name = context.name.lock(); - name.clone().into_vec() - }; - name.push(b'\n'); - Ok(name) + Ok(context::current().read().name.as_bytes().to_vec()) } diff --git a/src/scheme/sys/fdstat.rs b/src/scheme/sys/fdstat.rs new file mode 100644 index 00000000..5667137f --- /dev/null +++ b/src/scheme/sys/fdstat.rs @@ -0,0 +1,98 @@ +use crate::{ + context, + context::{file::FileDescription, memory::AddrSpaceWrapper}, + scheme, + syscall::error::Result, +}; +use alloc::{boxed::Box, string::String, sync::Arc, vec::Vec}; +use core::{fmt::Write, hash::Hash}; +use hashbrown::HashMap; +use spin::RwLock; + +pub fn resource() -> Result> { + #[derive(Debug)] + struct Ref(Arc); + impl Hash for Ref { + fn hash(&self, state: &mut H) { + state.write_usize(Arc::as_ptr(&self.0) as usize); + } + } + impl PartialEq for Ref { + fn eq(&self, other: &Self) -> bool { + Arc::as_ptr(&self.0) == Arc::as_ptr(&other.0) + } + } + impl Eq for Ref {} + #[derive(Default)] + struct Descr { + owners: HashMap, String>, + scheme: Box, + } + let mut map = HashMap::>, Descr>::new(); + + let mut report = String::new(); + 'contexts: for context in context::contexts().iter().filter_map(|c| c.upgrade()) { + let context = context.read(); + let files = context.files.read(); + writeln!(report, "'{}' {{", context.name).unwrap(); + + for file in files.iter().filter_map(|f| f.clone()) { + writeln!( + report, + "\tS{}W{}", + Arc::strong_count(&file.description), + Arc::weak_count(&file.description) + ) + .unwrap(); + let fr = Ref(file.description.clone()); + let Some(a) = context.addr_space.clone() else { + continue 'contexts; + }; + let descr = map.entry(fr).or_default(); + + let scheme_id = file.description.read().scheme; + let scheme = scheme::schemes() + .names + .iter() + .flat_map(|(_, v)| v.iter()) + .find_map(|(name, id)| { + if *id == scheme_id { + Some(name.clone()) + } else { + None + } + }); + descr + .owners + .entry(Ref(a)) + .or_insert(context.name.clone().into_owned()); + descr.scheme = scheme.unwrap_or(Box::from("[unknown]")); + } + writeln!(report, "}}").unwrap(); + } + writeln!(report, "==========").unwrap(); + let mut singletons = 0; + for (fr, ma) in map.iter() { + if ma.owners.len() == 1 { + singletons += 1; + } + writeln!( + report, + "{:p}: {:?}; {}", + fr.0, + ma.owners.values().cloned().collect::>(), + ma.scheme + ) + .unwrap(); + } + writeln!(report, "==========").unwrap(); + writeln!( + report, + "{} singletons out of {} total", + singletons, + map.len() + ) + .unwrap(); + + Ok(report.into()) +} diff --git a/src/scheme/sys/iostat.rs b/src/scheme/sys/iostat.rs index e0663873..39bc6f09 100644 --- a/src/scheme/sys/iostat.rs +++ b/src/scheme/sys/iostat.rs @@ -1,11 +1,6 @@ -use alloc::string::String; -use alloc::vec::Vec; +use crate::{context, scheme, syscall::error::Result}; +use alloc::{string::String, vec::Vec}; use core::fmt::Write; -use core::str; - -use crate::context; -use crate::scheme; -use crate::syscall::error::Result; pub fn resource() -> Result> { let mut string = String::new(); @@ -14,46 +9,57 @@ pub fn resource() -> Result> { let mut rows = Vec::new(); { let contexts = context::contexts(); - for (id, context_lock) in contexts.iter() { - let context = context_lock.read(); - rows.push((*id, context.name.lock().clone(), context.files.lock().clone())); + for context_ref in contexts.iter().filter_map(|r| r.upgrade()) { + let context = context_ref.read(); + rows.push(( + context.debug_id, + context.name.clone(), + context.files.read().clone(), + )); } } - for row in rows.iter() { - let id: usize = row.0.into(); - let name = str::from_utf8(&row.1).unwrap_or("."); + for (id, name, fs) in rows.iter() { let _ = writeln!(string, "{}: {}", id, name); - for (fd, f) in row.2.iter().enumerate() { + for (fd, f) in fs.iter().enumerate() { let file = match *f { None => continue, - Some(ref file) => file.clone() + Some(ref file) => file.clone(), }; let description = file.description.read(); - let scheme = { + let _scheme = { let schemes = scheme::schemes(); match schemes.get(description.scheme) { Some(scheme) => scheme.clone(), None => { - let _ = writeln!(string, " {:>4}: {:>8} {:>8} {:>08X}: no scheme", fd, description.scheme.into(), description.number, description.flags); + let _ = writeln!( + string, + " {:>4}: {:>8} {:>8} {:>08X}: no scheme", + fd, + description.scheme.get(), + description.number, + description.flags + ); continue; } } }; + /* let mut fpath = [0; 4096]; match scheme.fpath(description.number, &mut fpath) { Ok(path_len) => { let fname = str::from_utf8(&fpath[..path_len]).unwrap_or("?"); - let _ = writeln!(string, "{:>6}: {:>8} {:>8} {:>08X}: {}", fd, description.scheme.into(), description.number, description.flags, fname); + let _ = writeln!(string, "{:>6}: {:>8} {:>8} {:>08X}: {}", fd, description.scheme.get(), description.number, description.flags, fname); }, Err(err) => { - let _ = writeln!(string, "{:>6}: {:>8} {:>8} {:>08X}: {}", fd, description.scheme.into(), description.number, description.flags, err); + let _ = writeln!(string, "{:>6}: {:>8} {:>8} {:>08X}: {}", fd, description.scheme.get(), description.number, description.flags, err); } } + */ } } } diff --git a/src/scheme/sys/irq.rs b/src/scheme/sys/irq.rs new file mode 100644 index 00000000..655dc1b4 --- /dev/null +++ b/src/scheme/sys/irq.rs @@ -0,0 +1,17 @@ +use alloc::{string::String, vec::Vec}; +use core::fmt::Write; + +use crate::syscall::error::Result; + +pub fn resource() -> Result> { + let mut string = String::new(); + + { + let counts = crate::scheme::irq::COUNTS.lock(); + for (i, count) in counts.iter().enumerate() { + let _ = writeln!(string, "{}: {}", i, count); + } + } + + Ok(string.into_bytes()) +} diff --git a/src/scheme/sys/log.rs b/src/scheme/sys/log.rs index c8b8efd4..709e4a11 100644 --- a/src/scheme/sys/log.rs +++ b/src/scheme/sys/log.rs @@ -1,7 +1,6 @@ use alloc::vec::Vec; -use crate::log::LOG; -use crate::syscall::error::Result; +use crate::{log::LOG, syscall::error::Result}; pub fn resource() -> Result> { let mut vec = Vec::new(); diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs index ee995117..57a3b691 100644 --- a/src/scheme/sys/mod.rs +++ b/src/scheme/sys/mod.rs @@ -1,170 +1,278 @@ -use alloc::boxed::Box; -use alloc::collections::BTreeMap; -use alloc::vec::Vec; -use core::{cmp, str}; -use core::sync::atomic::{AtomicUsize, Ordering}; +// TODO: This scheme can be simplified significantly, and through it, several other APIs where it's +// dubious whether they require dedicated schemes (like irq, dtb, acpi). In particular, the kernel +// could abandon the filesystem-like APIs here in favor of SYS_CALL, and instead let userspace wrap +// those to say shell-accessible fs-like APIs. + +use ::syscall::{ + dirent::{DirEntry, DirentBuf, DirentKind}, + EBADFD, EINVAL, EIO, EISDIR, ENOTDIR, EPERM, +}; +use alloc::{collections::BTreeMap, vec::Vec}; +use core::{ + str, + sync::atomic::{AtomicUsize, Ordering}, +}; use spin::RwLock; -use crate::syscall::data::Stat; -use crate::syscall::error::{Error, EBADF, EINVAL, ENOENT, Result}; -use crate::syscall::flag::{MODE_DIR, MODE_FILE, SEEK_CUR, SEEK_END, SEEK_SET}; -use crate::syscall::scheme::Scheme; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use crate::arch::interrupt; +use crate::{ + context::file::InternalFlags, + syscall::{ + data::Stat, + error::{Error, Result, EBADF, ENOENT}, + flag::{MODE_DIR, MODE_FILE}, + usercopy::{UserSliceRo, UserSliceWo}, + }, +}; + +use super::{CallerCtx, KernelScheme, OpenResult}; +mod block; mod context; mod cpu; + +#[cfg(feature = "sys_fdstat")] +mod fdstat; + mod exe; mod iostat; +mod irq; mod log; mod scheme; mod scheme_num; mod syscall; mod uname; -struct Handle { - path: &'static [u8], - data: Vec, - mode: u16, - seek: usize -} - -type SysFn = Fn() -> Result> + Send + Sync; +#[cfg(feature = "sys_stat")] +mod stat; -/// System information scheme -pub struct SysScheme { - next_id: AtomicUsize, - files: BTreeMap<&'static [u8], Box>, - handles: RwLock> +enum Handle { + TopLevel, + Resource { + path: &'static str, + data: Option>, + }, } -impl SysScheme { - pub fn new() -> SysScheme { - let mut files: BTreeMap<&'static [u8], Box> = BTreeMap::new(); - - files.insert(b"context", Box::new(move || context::resource())); - files.insert(b"cpu", Box::new(move || cpu::resource())); - files.insert(b"exe", Box::new(move || exe::resource())); - files.insert(b"iostat", Box::new(move || iostat::resource())); - files.insert(b"log", Box::new(move || log::resource())); - files.insert(b"scheme", Box::new(move || scheme::resource())); - files.insert(b"scheme_num", Box::new(move || scheme_num::resource())); - files.insert(b"syscall", Box::new(move || syscall::resource())); - files.insert(b"uname", Box::new(move || uname::resource())); - - SysScheme { - next_id: AtomicUsize::new(0), - files: files, - handles: RwLock::new(BTreeMap::new()) - } - } +enum Kind { + Rd(fn() -> Result>), + Wr(fn(&[u8]) -> Result), } +use Kind::*; -impl Scheme for SysScheme { - fn open(&self, path: &[u8], _flags: usize, _uid: u32, _gid: u32) -> Result { - let path_utf8 = str::from_utf8(path).or(Err(Error::new(ENOENT)))?; - let path_trimmed = path_utf8.trim_matches('/'); +/// System information scheme +pub struct SysScheme; +static NEXT_ID: AtomicUsize = AtomicUsize::new(1); +// Using BTreeMap as hashbrown doesn't have a const constructor. +static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); - if path_trimmed.is_empty() { - let mut data = Vec::new(); - for entry in self.files.iter() { - if ! data.is_empty() { - data.push(b'\n'); - } - data.extend_from_slice(entry.0); +const FILES: &[(&'static str, Kind)] = &[ + ("block", Rd(block::resource)), + ("context", Rd(context::resource)), + ("cpu", Rd(cpu::resource)), + #[cfg(feature = "sys_fdstat")] + ("fdstat", Rd(fdstat::resource)), + ("exe", Rd(exe::resource)), + ("iostat", Rd(iostat::resource)), + ("irq", Rd(irq::resource)), + ("log", Rd(log::resource)), + ("scheme", Rd(scheme::resource)), + ("scheme_num", Rd(scheme_num::resource)), + ("syscall", Rd(syscall::resource)), + ("uname", Rd(uname::resource)), + ("env", Rd(|| Ok(Vec::from(crate::init_env())))), + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + ("spurious_irq", Rd(interrupt::irq::spurious_irq_resource)), + #[cfg(feature = "sys_stat")] + ("stat", Rd(stat::resource)), + // Disabled because the debugger is inherently unsafe and probably will break the system. + /* + ("trigger_debugger", Rd(|| unsafe { + crate::debugger::debugger(None); + Ok(Vec::new()) + })), + */ + ( + "update_time_offset", + Wr(crate::time::sys_update_time_offset), + ), + ( + "kstop", + Wr(|arg| unsafe { + match arg.trim_ascii() { + b"shutdown" => crate::stop::kstop(), + b"reset" => crate::stop::kreset(), + b"emergency_reset" => crate::stop::emergency_reset(), + _ => Err(Error::new(EINVAL)), } + }), + ), +]; - let id = self.next_id.fetch_add(1, Ordering::SeqCst); - self.handles.write().insert(id, Handle { - path: b"", - data: data, - mode: MODE_DIR | 0o444, - seek: 0 - }); - return Ok(id) +impl KernelScheme for SysScheme { + fn kopen(&self, path: &str, _flags: usize, ctx: CallerCtx) -> Result { + let path = path.trim_matches('/'); + + if path.is_empty() { + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + + HANDLES.write().insert(id, Handle::TopLevel); + + Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED)) } else { //Have to iterate to get the path without allocation - for entry in self.files.iter() { - if entry.0 == &path_trimmed.as_bytes() { - let id = self.next_id.fetch_add(1, Ordering::SeqCst); - self.handles.write().insert(id, Handle { - path: entry.0, - data: entry.1()?, - mode: MODE_FILE | 0o444, - seek: 0 - }); - return Ok(id) - } + let entry = FILES + .iter() + .find(|(entry_path, _)| *entry_path == path) + .ok_or(Error::new(ENOENT))?; + + if matches!(entry.1, Wr(_)) && ctx.uid != 0 { + return Err(Error::new(EPERM)); } - } - Err(Error::new(ENOENT)) + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + let data = match entry.1 { + Rd(r) => Some(r()?), + Wr(_) => None, + }; + HANDLES.write().insert( + id, + Handle::Resource { + path: entry.0, + data, + }, + ); + Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED)) + } } - fn read(&self, id: usize, buffer: &mut [u8]) -> Result { - let mut handles = self.handles.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - - let mut i = 0; - while i < buffer.len() && handle.seek < handle.data.len() { - buffer[i] = handle.data[handle.seek]; - i += 1; - handle.seek += 1; + fn fsize(&self, id: usize) -> Result { + match HANDLES.read().get(&id).ok_or(Error::new(EBADF))? { + Handle::TopLevel => Ok(0), + Handle::Resource { data, .. } => Ok(data.as_ref().map_or(0, |d| d.len() as u64)), } + } - Ok(i) + fn close(&self, id: usize) -> Result<()> { + HANDLES.write().remove(&id).ok_or(Error::new(EBADF))?; + Ok(()) } + fn kfpath(&self, id: usize, buf: UserSliceWo) -> Result { + let handles = HANDLES.read(); + let path = match handles.get(&id).ok_or(Error::new(EBADF))? { + Handle::TopLevel => "", + Handle::Resource { path, .. } => path, + }; - fn seek(&self, id: usize, pos: usize, whence: usize) -> Result { - let mut handles = self.handles.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + const FIRST: &[u8] = b"sys:"; + let mut bytes_read = buf.copy_common_bytes_from_slice(FIRST)?; - handle.seek = match whence { - SEEK_SET => cmp::min(handle.data.len(), pos), - SEEK_CUR => cmp::max(0, cmp::min(handle.data.len() as isize, handle.seek as isize + pos as isize)) as usize, - SEEK_END => cmp::max(0, cmp::min(handle.data.len() as isize, handle.data.len() as isize + pos as isize)) as usize, - _ => return Err(Error::new(EINVAL)) - }; + if let Some(remaining) = buf.advance(FIRST.len()) { + bytes_read += remaining.copy_common_bytes_from_slice(path.as_bytes())?; + } - Ok(handle.seek) + Ok(bytes_read) } + fn kreadoff( + &self, + id: usize, + buffer: UserSliceWo, + pos: u64, + _flags: u32, + _stored_flags: u32, + ) -> Result { + let Ok(pos) = usize::try_from(pos) else { + return Ok(0); + }; - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + match HANDLES.read().get(&id).ok_or(Error::new(EBADF))? { + Handle::TopLevel | Handle::Resource { data: None, .. } => { + return Err(Error::new(EISDIR)) + } + Handle::Resource { + data: Some(ref data), + .. + } => { + let avail_buf = data.get(pos..).unwrap_or(&[]); - let mut i = 0; - let scheme_path = b"sys:"; - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; + buffer.copy_common_bytes_from_slice(avail_buf) + } } - - let mut j = 0; - while i < buf.len() && j < handle.path.len() { - buf[i] = handle.path[j]; - i += 1; - j += 1; + } + fn kwriteoff( + &self, + id: usize, + buffer: UserSliceRo, + _pos: u64, + _flags: u32, + _stored_flags: u32, + ) -> Result { + match HANDLES.read().get(&id).ok_or(Error::new(EBADF))? { + Handle::TopLevel | Handle::Resource { data: Some(_), .. } => { + return Err(Error::new(EISDIR)) + } + Handle::Resource { data: None, path } => { + let mut intermediate = [0_u8; 256]; + let len = buffer.copy_common_bytes_to_slice(&mut intermediate)?; + let (_, Wr(handler)) = FILES + .iter() + .find(|(entry_path, _)| entry_path == path) + .ok_or(Error::new(EBADFD))? + else { + return Err(Error::new(EBADFD))?; + }; + handler(&intermediate[..len]) + } } - - Ok(i) } - - fn fstat(&self, id: usize, stat: &mut Stat) -> Result { - let handles = self.handles.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - stat.st_mode = handle.mode; - stat.st_uid = 0; - stat.st_gid = 0; - stat.st_size = handle.data.len() as u64; - - Ok(0) + fn getdents( + &self, + id: usize, + buf: UserSliceWo, + header_size: u16, + first_index: u64, + ) -> Result { + let Ok(first_index) = usize::try_from(first_index) else { + return Ok(0); + }; + match HANDLES.read().get(&id).ok_or(Error::new(EBADF))? { + Handle::Resource { .. } => return Err(Error::new(ENOTDIR)), + Handle::TopLevel => { + let mut buf = DirentBuf::new(buf, header_size).ok_or(Error::new(EIO))?; + for (this_idx, (name, _)) in FILES.iter().enumerate().skip(first_index) { + buf.entry(DirEntry { + inode: this_idx as u64, + next_opaque_id: this_idx as u64 + 1, + kind: DirentKind::Regular, + name, + })?; + } + Ok(buf.finalize()) + } + } } - fn fsync(&self, _id: usize) -> Result { - Ok(0) - } + fn kfstat(&self, id: usize, buf: UserSliceWo) -> Result<()> { + let stat = match HANDLES.read().get(&id).ok_or(Error::new(EBADF))? { + Handle::Resource { data, .. } => Stat { + st_mode: 0o666 | MODE_FILE, + st_uid: 0, + st_gid: 0, + st_size: data.as_ref().map_or(0, |d| d.len() as u64), + ..Default::default() + }, + Handle::TopLevel => Stat { + st_mode: 0o444 | MODE_DIR, + st_uid: 0, + st_gid: 0, + st_size: 0, + ..Default::default() + }, + }; + + buf.copy_exactly(&stat)?; - fn close(&self, id: usize) -> Result { - self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) + Ok(()) } } diff --git a/src/scheme/sys/scheme.rs b/src/scheme/sys/scheme.rs index f97c6e34..f27f337e 100644 --- a/src/scheme/sys/scheme.rs +++ b/src/scheme/sys/scheme.rs @@ -1,22 +1,15 @@ use alloc::vec::Vec; -use crate::context; -use crate::scheme; -use crate::syscall::error::{Error, ESRCH, Result}; +use crate::{context, scheme, syscall::error::Result}; pub fn resource() -> Result> { - let scheme_ns = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - context.ens - }; + let scheme_ns = context::current().read().ens; let mut data = Vec::new(); let schemes = scheme::schemes(); for (name, _scheme_id) in schemes.iter_name(scheme_ns) { - data.extend_from_slice(name); + data.extend_from_slice(name.as_bytes()); data.push(b'\n'); } diff --git a/src/scheme/sys/scheme_num.rs b/src/scheme/sys/scheme_num.rs index fbd85ac4..1409268a 100644 --- a/src/scheme/sys/scheme_num.rs +++ b/src/scheme/sys/scheme_num.rs @@ -1,23 +1,16 @@ use alloc::vec::Vec; -use crate::context; -use crate::scheme; -use crate::syscall::error::{Error, ESRCH, Result}; +use crate::{context, scheme, syscall::error::Result}; pub fn resource() -> Result> { - let scheme_ns = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - context.ens - }; + let scheme_ns = context::current().read().ens; let mut data = Vec::new(); let schemes = scheme::schemes(); for (name, &scheme_id) in schemes.iter_name(scheme_ns) { - data.extend_from_slice(format!("{:>4}: ", scheme_id.into()).as_bytes()); - data.extend_from_slice(name); + data.extend_from_slice(format!("{:>4}: ", scheme_id.get()).as_bytes()); + data.extend_from_slice(name.as_bytes()); data.push(b'\n'); } diff --git a/src/scheme/sys/stat.rs b/src/scheme/sys/stat.rs new file mode 100644 index 00000000..170d115d --- /dev/null +++ b/src/scheme/sys/stat.rs @@ -0,0 +1,89 @@ +use crate::{ + context::{contexts, ContextRef, Status}, + cpu_stats::{get_context_switch_count, get_contexts_count, irq_counts}, + percpu::get_all_stats, + syscall::error::Result, + time::START, +}; +use alloc::{string::String, vec::Vec}; + +/// Get the sys:stat data as displayed to the user. +pub fn resource() -> Result> { + let start_time_sec = *START.lock() / 1_000_000_000; + + let (contexts_running, contexts_blocked) = get_contexts_stats(); + let res = format!( + "{}{}\n\ + boot_time: {start_time_sec}\n\ + context_switches: {}\n\ + contexts_created: {}\n\ + contexts_running: {contexts_running}\n\ + contexts_blocked: {contexts_blocked}", + get_cpu_stats(), + get_irq_stats(), + get_context_switch_count(), + get_contexts_count(), + ); + + Ok(res.into_bytes()) +} + +/// Formats CPU stats. +fn get_cpu_stats() -> String { + let mut cpu_data = String::new(); + let stats = get_all_stats(); + + let mut total_user = 0; + let mut total_nice = 0; + let mut total_kernel = 0; + let mut total_idle = 0; + let mut total_irq = 0; + for (id, stat) in stats { + total_user += stat.user; + total_nice += stat.nice; + total_kernel += stat.kernel; + total_idle += stat.idle; + total_irq += stat.irq; + cpu_data += &format!("{}\n", stat.to_string(id)); + } + format!( + "cpu {total_user} {total_nice} {total_kernel} {total_idle} {total_irq}\n\ + {cpu_data}" + ) +} + +/// Formats IRQ stats. +fn get_irq_stats() -> String { + let irq = irq_counts(); + let mut irq_total = 0; + let per_irq = irq + .iter() + .map(|c| { + irq_total += *c; + format!("{c}") + }) + .collect::>() + .join(" "); + format!("IRQs {irq_total} {per_irq}") +} + +/// Format contexts stats. +fn get_contexts_stats() -> (u64, u64) { + let mut running = 0; + let mut blocked = 0; + + let statuses = contexts() + .iter() + .filter_map(ContextRef::upgrade) + .map(|context| context.read_arc().status.clone()) + .collect::>(); + + for status in statuses { + if matches!(status, Status::Runnable) { + running += 1; + } else if !matches!(status, Status::Dead) { + blocked += 1; + } + } + (running, blocked) +} diff --git a/src/scheme/sys/syscall.rs b/src/scheme/sys/syscall.rs index 4d2eea4f..6c5f9ae0 100644 --- a/src/scheme/sys/syscall.rs +++ b/src/scheme/sys/syscall.rs @@ -1,11 +1,7 @@ -use alloc::string::String; -use alloc::vec::Vec; +use alloc::{string::String, vec::Vec}; use core::fmt::Write; -use core::str; -use crate::context; -use crate::syscall; -use crate::syscall::error::Result; +use crate::{context, syscall, syscall::error::Result}; pub fn resource() -> Result> { let mut string = String::new(); @@ -14,20 +10,25 @@ pub fn resource() -> Result> { let mut rows = Vec::new(); { let contexts = context::contexts(); - for (id, context_lock) in contexts.iter() { - let context = context_lock.read(); - rows.push((*id, context.name.lock().clone(), context.syscall.clone())); + for context_ref in contexts.iter().filter_map(|r| r.upgrade()) { + let context = context_ref.read(); + rows.push(( + context.debug_id, + context.name.clone(), + context.current_syscall(), + )); } } - for row in rows.iter() { - let id: usize = row.0.into(); - let name = str::from_utf8(&row.1).unwrap_or("."); - + for &(id, ref name, sc) in rows.iter() { let _ = writeln!(string, "{}: {}", id, name); - if let Some((a, b, c, d, e, f)) = row.2 { - let _ = writeln!(string, " {}", syscall::debug::format_call(a, b, c, d, e, f)); + if let Some([a, b, c, d, e, f]) = sc { + let _ = writeln!( + string, + " {}", + syscall::debug::format_call(a, b, c, d, e, f) + ); } } } diff --git a/src/scheme/sys/uname.rs b/src/scheme/sys/uname.rs index b8254615..c4136a0d 100644 --- a/src/scheme/sys/uname.rs +++ b/src/scheme/sys/uname.rs @@ -1,9 +1,11 @@ -use alloc::vec::Vec; use crate::syscall::error::Result; +use alloc::vec::Vec; pub fn resource() -> Result> { - Ok(format!("Redox\n\n{}\n\n{}\n", - env!("CARGO_PKG_VERSION"), - env!("TARGET").split('-').next().unwrap()).into_bytes()) + Ok(format!( + "Redox\n\n{}\n\n{}\n", + env!("CARGO_PKG_VERSION"), + env!("TARGET").split('-').next().unwrap() + ) + .into_bytes()) } - diff --git a/src/scheme/time.rs b/src/scheme/time.rs index 8b340fb4..55ac9b3c 100644 --- a/src/scheme/time.rs +++ b/src/scheme/time.rs @@ -1,121 +1,117 @@ use alloc::collections::BTreeMap; -use core::{mem, slice, str}; -use core::sync::atomic::{AtomicUsize, Ordering}; +use core::{ + mem, str, + sync::atomic::{AtomicUsize, Ordering}, +}; use spin::RwLock; -use crate::context::timeout; -use crate::scheme::SchemeId; -use crate::syscall::data::TimeSpec; -use crate::syscall::error::*; -use crate::syscall::flag::{CLOCK_REALTIME, CLOCK_MONOTONIC}; -use crate::syscall::scheme::Scheme; -use crate::time; - -pub struct TimeScheme { - scheme_id: SchemeId, - next_id: AtomicUsize, - handles: RwLock> -} +use crate::{ + context::{file::InternalFlags, timeout}, + syscall::{ + data::TimeSpec, + error::*, + flag::{EventFlags, CLOCK_MONOTONIC, CLOCK_REALTIME}, + usercopy::{UserSliceRo, UserSliceWo}, + }, + time, +}; -impl TimeScheme { - pub fn new(scheme_id: SchemeId) -> TimeScheme { - TimeScheme { - scheme_id: scheme_id, - next_id: AtomicUsize::new(0), - handles: RwLock::new(BTreeMap::new()) - } - } -} +use super::{CallerCtx, GlobalSchemes, KernelScheme, OpenResult}; -impl Scheme for TimeScheme { - fn open(&self, path: &[u8], _flags: usize, _uid: u32, _gid: u32) -> Result { - let path_str = str::from_utf8(path).or(Err(Error::new(ENOENT)))?; +static NEXT_ID: AtomicUsize = AtomicUsize::new(1); +// Using BTreeMap as hashbrown doesn't have a const constructor. +static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); - let clock = path_str.parse::().or(Err(Error::new(ENOENT)))?; +pub struct TimeScheme; + +impl KernelScheme for TimeScheme { + fn kopen(&self, path: &str, _flags: usize, _ctx: CallerCtx) -> Result { + let clock = path.parse::().map_err(|_| Error::new(ENOENT))?; match clock { CLOCK_REALTIME => (), CLOCK_MONOTONIC => (), - _ => return Err(Error::new(ENOENT)) + _ => return Err(Error::new(ENOENT)), } - let id = self.next_id.fetch_add(1, Ordering::SeqCst); - self.handles.write().insert(id, clock); + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + HANDLES.write().insert(id, clock); - Ok(id) + Ok(OpenResult::SchemeLocal(id, InternalFlags::empty())) } - fn read(&self, id: usize, buf: &mut [u8]) -> Result { - let clock = { - let handles = self.handles.read(); - *handles.get(&id).ok_or(Error::new(EBADF))? - }; + fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { + Ok(0) + } - let time_buf = unsafe { slice::from_raw_parts_mut(buf.as_mut_ptr() as *mut TimeSpec, buf.len()/mem::size_of::()) }; + fn fevent(&self, id: usize, _flags: EventFlags) -> Result { + HANDLES + .read() + .get(&id) + .ok_or(Error::new(EBADF)) + .and(Ok(EventFlags::empty())) + } - let mut i = 0; - while i < time_buf.len() { - let arch_time = match clock { - CLOCK_REALTIME => time::realtime(), - CLOCK_MONOTONIC => time::monotonic(), - _ => return Err(Error::new(EINVAL)) - }; - time_buf[i].tv_sec = arch_time.0 as i64; - time_buf[i].tv_nsec = arch_time.1 as i32; - i += 1; - } + fn fsync(&self, id: usize) -> Result<()> { + HANDLES.read().get(&id).ok_or(Error::new(EBADF))?; + Ok(()) + } - Ok(i * mem::size_of::()) + fn close(&self, id: usize) -> Result<()> { + HANDLES + .write() + .remove(&id) + .ok_or(Error::new(EBADF)) + .and(Ok(())) } + fn kread(&self, id: usize, buf: UserSliceWo, _flags: u32, _stored_flags: u32) -> Result { + let clock = *HANDLES.read().get(&id).ok_or(Error::new(EBADF))?; - fn write(&self, id: usize, buf: &[u8]) -> Result { - let clock = { - let handles = self.handles.read(); - *handles.get(&id).ok_or(Error::new(EBADF))? - }; + let mut bytes_read = 0; - let time_buf = unsafe { slice::from_raw_parts(buf.as_ptr() as *const TimeSpec, buf.len()/mem::size_of::()) }; + for current_chunk in buf.in_exact_chunks(mem::size_of::()) { + let arch_time = match clock { + CLOCK_REALTIME => time::realtime(), + CLOCK_MONOTONIC => time::monotonic(), + _ => return Err(Error::new(EINVAL)), + }; + let time = TimeSpec { + tv_sec: (arch_time / time::NANOS_PER_SEC) as i64, + tv_nsec: (arch_time % time::NANOS_PER_SEC) as i32, + }; + current_chunk.copy_exactly(&time)?; - let mut i = 0; - while i < time_buf.len() { - let time = time_buf[i]; - timeout::register(self.scheme_id, id, clock, time); - i += 1; + bytes_read += mem::size_of::(); } - Ok(i * mem::size_of::()) + Ok(bytes_read) } - fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { - Ok(0) - } + fn kwrite( + &self, + id: usize, + buf: UserSliceRo, + _flags: u32, + _stored_flags: u32, + ) -> Result { + let clock = *HANDLES.read().get(&id).ok_or(Error::new(EBADF))?; - fn fevent(&self, id: usize, _flags: usize) -> Result { - let handles = self.handles.read(); - handles.get(&id).ok_or(Error::new(EBADF)).and(Ok(0)) - } + let mut bytes_written = 0; - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let clock = { - let handles = self.handles.read(); - *handles.get(&id).ok_or(Error::new(EBADF))? - }; + for current_chunk in buf.in_exact_chunks(mem::size_of::()) { + let time = unsafe { current_chunk.read_exact::()? }; - let mut i = 0; - let scheme_path = format!("time:{}", clock).into_bytes(); - while i < buf.len() && i < scheme_path.len() { - buf[i] = scheme_path[i]; - i += 1; + timeout::register(GlobalSchemes::Time.scheme_id(), id, clock, time); + + bytes_written += mem::size_of::(); } - Ok(i) - } - fn fsync(&self, id: usize) -> Result { - let handles = self.handles.read(); - handles.get(&id).ok_or(Error::new(EBADF)).and(Ok(0)) + Ok(bytes_written) } + fn kfpath(&self, id: usize, buf: UserSliceWo) -> Result { + let clock = *HANDLES.read().get(&id).ok_or(Error::new(EBADF))?; - fn close(&self, id: usize) -> Result { - self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) + let scheme_path = format!("time:{}", clock).into_bytes(); + buf.copy_common_bytes_from_slice(&scheme_path) } } diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 11a86200..fd168bb3 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -1,59 +1,223 @@ -use alloc::sync::{Arc, Weak}; -use alloc::boxed::Box; -use alloc::collections::BTreeMap; -use core::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use core::{mem, slice, usize}; +use alloc::{ + boxed::Box, + sync::{Arc, Weak}, + vec::Vec, +}; +use core::{ + mem, + mem::size_of, + num::NonZeroUsize, + sync::atomic::{AtomicBool, Ordering}, + usize, +}; +use slab::Slab; use spin::{Mutex, RwLock}; - -use crate::context::{self, Context}; -use crate::context::file::FileDescriptor; -use crate::context::memory::Grant; -use crate::event; -use crate::paging::{InactivePageTable, Page, VirtualAddress}; -use crate::paging::entry::EntryFlags; -use crate::paging::temporary_page::TemporaryPage; -use crate::scheme::{AtomicSchemeId, ATOMIC_SCHEMEID_INIT, SchemeId}; -use crate::sync::{WaitQueue, WaitMap}; -use crate::syscall::data::{Map, Packet, Stat, StatVfs, TimeSpec}; -use crate::syscall::error::*; -use crate::syscall::flag::{EVENT_READ, O_NONBLOCK, PROT_EXEC, PROT_READ, PROT_WRITE}; -use crate::syscall::number::*; -use crate::syscall::scheme::Scheme; +use spinning_top::RwSpinlock; +use syscall::{ + schemev2::{Cqe, CqeOpcode, Opcode, Sqe, SqeFlags}, + CallFlags, FobtainFdFlags, MunmapFlags, SendFdFlags, F_SETFL, KSMSG_CANCEL, + MAP_FIXED_NOREPLACE, SKMSG_FOBTAINFD, SKMSG_FRETURNFD, SKMSG_PROVIDE_MMAP, +}; + +use crate::{ + context::{ + self, + context::HardBlockedReason, + file::{FileDescription, FileDescriptor, InternalFlags}, + memory::{ + AddrSpace, AddrSpaceWrapper, BorrowedFmapSource, Grant, GrantFileRef, MmapMode, + PageSpan, DANGLING, + }, + BorrowedHtBuf, Context, Status, + }, + event, + memory::Frame, + paging::{Page, VirtualAddress, PAGE_SIZE}, + scheme::SchemeId, + sync::WaitQueue, + syscall::{ + data::{Map, Packet}, + error::*, + flag::{EventFlags, MapFlags, EVENT_READ, O_NONBLOCK, PROT_READ}, + number::*, + usercopy::{UserSlice, UserSliceRo, UserSliceRw, UserSliceWo}, + }, +}; + +use super::{CallerCtx, FileHandle, KernelScheme, OpenResult}; pub struct UserInner { root_id: SchemeId, handle_id: usize, - pub name: Box<[u8]>, - pub flags: usize, - pub scheme_id: AtomicSchemeId, - next_id: AtomicU64, - context: Weak>, - todo: WaitQueue, - fmap: Mutex>, FileDescriptor, Map)>>, - funmap: Mutex>, - done: WaitMap, + pub name: Box, + pub scheme_id: SchemeId, + v2: bool, + supports_on_close: bool, + context: Weak>, + todo: WaitQueue, + + // TODO: custom packed radix tree data structure + states: Mutex>, + unmounting: AtomicBool, } +enum State { + Waiting { + context: Weak>, + fd: Option>>, + callee_responsible: PageSpan, + canceling: bool, + }, + Responded(Response), + Fmap(Weak>), + Placeholder, +} + +#[derive(Debug)] +pub enum Response { + Regular(usize, u8), + Fd(Arc>), +} + +const ONE: NonZeroUsize = match NonZeroUsize::new(1) { + Some(one) => one, + None => unreachable!(), +}; + +enum ParsedCqe { + TriggerFevent { + number: usize, + flags: EventFlags, + }, + RegularResponse { + tag: u32, + code: usize, + extra0: u8, + }, + ResponseWithFd { + tag: u32, + fd: usize, + }, + ObtainFd { + tag: u32, + flags: FobtainFdFlags, + dst_fd_or_ptr: usize, + }, + ProvideMmap { + tag: u32, + offset: u64, + base_addr: VirtualAddress, + page_count: usize, + }, + NoOp, // TODO: remove +} +impl ParsedCqe { + fn parse_packet(packet: &Packet) -> Result { + Ok(if packet.id == 0 { + match packet.a { + SYS_FEVENT => Self::TriggerFevent { + number: packet.b, + flags: EventFlags::from_bits_truncate(packet.c), + }, + _ => { + log::warn!( + "Unknown scheme -> kernel message {} from {}", + packet.a, + context::current().read().name + ); + + // Some schemes don't implement cancellation properly yet, so we temporarily + // ignore their responses to the cancellation message, rather than EINVAL. + if packet.a == Error::mux(Err(Error::new(ENOSYS))) { + return Ok(Self::NoOp); + } + + return Err(Error::new(EINVAL)); + } + } + } else if Error::demux(packet.a) == Err(Error::new(ESKMSG)) { + // The reason why the new ESKMSG mechanism was introduced, is that passing packet IDs + // in packet.id is much cleaner than having to convert it into 1 or 2 usizes etc. + match packet.b { + SKMSG_FRETURNFD => Self::ResponseWithFd { + tag: (packet.id - 1) as u32, + fd: packet.d, + }, + SKMSG_FOBTAINFD => Self::ObtainFd { + tag: (packet.id - 1) as u32, + flags: FobtainFdFlags::from_bits(packet.d).ok_or(Error::new(EINVAL))?, + dst_fd_or_ptr: packet.c, + }, + SKMSG_PROVIDE_MMAP => Self::ProvideMmap { + tag: (packet.id - 1) as u32, + offset: u64::from(packet.uid) | (u64::from(packet.gid) << 32), + base_addr: VirtualAddress::new(packet.c), + page_count: packet.d, + }, + _ => return Err(Error::new(EINVAL)), + } + } else { + ParsedCqe::RegularResponse { + tag: (packet.id - 1) as u32, + code: packet.a, + extra0: 0, + } + }) + } + fn parse_cqe(cqe: &Cqe) -> Result { + Ok( + match CqeOpcode::try_from_raw(cqe.flags & 0b11).ok_or(Error::new(EINVAL))? { + CqeOpcode::RespondRegular => Self::RegularResponse { + tag: cqe.tag, + code: cqe.result as usize, + extra0: cqe.extra_raw[0], + }, + CqeOpcode::RespondWithFd => Self::ResponseWithFd { + tag: cqe.tag, + fd: cqe.result as usize, + }, + CqeOpcode::SendFevent => Self::TriggerFevent { + number: cqe.result as usize, + flags: EventFlags::from_bits(cqe.tag as usize).ok_or(Error::new(EINVAL))?, + }, + CqeOpcode::ObtainFd => Self::ObtainFd { + tag: cqe.tag, + flags: FobtainFdFlags::from_bits(cqe.extra() as usize) + .ok_or(Error::new(EINVAL))?, + dst_fd_or_ptr: cqe.result as usize, + }, + }, + ) + } +} + impl UserInner { - pub fn new(root_id: SchemeId, handle_id: usize, name: Box<[u8]>, flags: usize, context: Weak>) -> UserInner { + pub fn new( + root_id: SchemeId, + scheme_id: SchemeId, + v2: bool, + new_close: bool, + handle_id: usize, + name: Box, + _flags: usize, + context: Weak>, + ) -> UserInner { UserInner { - root_id: root_id, - handle_id: handle_id, - name: name, - flags: flags, - scheme_id: ATOMIC_SCHEMEID_INIT, - next_id: AtomicU64::new(1), - context: context, + root_id, + handle_id, + name, + v2, + supports_on_close: new_close, + scheme_id, + context, todo: WaitQueue::new(), - fmap: Mutex::new(BTreeMap::new()), - funmap: Mutex::new(BTreeMap::new()), - done: WaitMap::new(), unmounting: AtomicBool::new(false), + states: Mutex::new(Slab::with_capacity(32)), } } - pub fn unmount(&self) -> Result { + pub fn unmount(&self) -> Result<()> { // First, block new requests and prepare to return EOF self.unmounting.store(true, Ordering::SeqCst); @@ -64,417 +228,1582 @@ impl UserInner { event::trigger(self.root_id, self.handle_id, EVENT_READ); //TODO: wait for all todo and done to be processed? - Ok(0) + Ok(()) } - pub fn call(&self, a: usize, b: usize, c: usize, d: usize) -> Result { - let (pid, uid, gid) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.id, context.euid, context.egid) - }; + fn next_id(&self) -> Result { + let mut states = self.states.lock(); + let idx = states.insert(State::Placeholder); - self.call_inner(Packet { - id: self.next_id.fetch_add(1, Ordering::SeqCst), - pid: pid.into(), - uid: uid, - gid: gid, - a: a, - b: b, - c: c, - d: d - }) + // TODO: implement blocking? + u32::try_from(idx).map_err(|_| Error::new(EAGAIN)) + } + + pub fn call( + &self, + opcode: Opcode, + args: impl Args, + caller_responsible: &mut PageSpan, + ) -> Result { + let ctx = context::current().read().caller_ctx(); + match self.call_extended(ctx, None, opcode, args, caller_responsible)? { + Response::Regular(code, _) => Error::demux(code), + Response::Fd(_) => Err(Error::new(EIO)), + } + } + + pub fn call_extended( + &self, + ctx: CallerCtx, + fd: Option>>, + opcode: Opcode, + args: impl Args, + caller_responsible: &mut PageSpan, + ) -> Result { + self.call_extended_inner( + fd, + Sqe { + opcode: opcode as u8, + sqe_flags: SqeFlags::empty(), + _rsvd: 0, + tag: self.next_id()?, + caller: ctx.pid as u64, + args: { + let mut a = args.args(); + a[5] = uid_gid_hack_merge([ctx.uid, ctx.gid]); + a + }, + }, + caller_responsible, + ) } - fn call_inner(&self, packet: Packet) -> Result { + fn call_extended_inner( + &self, + fd: Option>>, + sqe: Sqe, + caller_responsible: &mut PageSpan, + ) -> Result { if self.unmounting.load(Ordering::SeqCst) { return Err(Error::new(ENODEV)); } - let id = packet.id; + let current_context = context::current(); + + { + let mut states = self.states.lock(); + current_context.write().block("UserScheme::call"); + states[sqe.tag as usize] = State::Waiting { + context: Arc::downgrade(¤t_context), + fd, + canceling: false, + + // This is the part that the scheme handler will deallocate when responding. It + // starts as empty, so the caller can unmap it (optimal for TLB), but is populated + // the caller is interrupted by SIGKILL. + callee_responsible: PageSpan::empty(), + }; + self.todo.send(sqe); + } - self.todo.send(packet); event::trigger(self.root_id, self.handle_id, EVENT_READ); - Error::demux(self.done.receive(&id)) - } + loop { + context::switch(); + + let mut states = self.states.lock(); - pub fn capture(&self, buf: &[u8]) -> Result { - UserInner::capture_inner(&self.context, buf.as_ptr() as usize, buf.len(), PROT_READ, None) + let mut eintr_if_sigkill = |callee_responsible: &mut PageSpan| { + // If SIGKILL was found without waiting for scheme, EINTR directly. In that + // case, data loss doesn't matter. + if context::current().read().being_sigkilled { + // Callee must deallocate memory, rather than the caller. This is less optimal + // for TLB, but we don't really have any other choice. The scheme must be able + // to access the borrowed memory until it has responded to the request. + *callee_responsible = core::mem::replace(caller_responsible, PageSpan::empty()); + + Err(Error::new(EINTR)) + } else { + Ok(()) + } + }; + + match states.get_mut(sqe.tag as usize) { + // invalid state + None => return Err(Error::new(EBADFD)), + Some(o) => match mem::replace(o, State::Placeholder) { + // signal wakeup while awaiting cancelation + State::Waiting { + canceling: true, + mut callee_responsible, + context, + fd, + } => { + let maybe_eintr = eintr_if_sigkill(&mut callee_responsible); + *o = State::Waiting { + canceling: true, + callee_responsible, + context, + fd, + }; + drop(states); + maybe_eintr?; + + context::current().write().block("UserInner::call"); + } + // spurious wakeup + State::Waiting { + canceling: false, + fd, + context, + mut callee_responsible, + } => { + let maybe_eintr = eintr_if_sigkill(&mut callee_responsible); + *o = State::Waiting { + canceling: true, + fd, + context, + callee_responsible, + }; + + drop(states); + maybe_eintr?; + + // TODO: Is this too dangerous when the states lock is held? + self.todo.send(Sqe { + opcode: Opcode::Cancel as u8, + sqe_flags: SqeFlags::ONEWAY, + tag: sqe.tag, + ..Default::default() + }); + event::trigger(self.root_id, self.handle_id, EVENT_READ); + context::current().write().block("UserInner::call"); + } + + // invalid state + old_state @ (State::Placeholder | State::Fmap(_)) => { + *o = old_state; + return Err(Error::new(EBADFD)); + } + + State::Responded(response) => { + states.remove(sqe.tag as usize); + return Ok(response); + } + }, + } + } } - pub fn capture_mut(&self, buf: &mut [u8]) -> Result { - UserInner::capture_inner(&self.context, buf.as_mut_ptr() as usize, buf.len(), PROT_WRITE, None) + /// Map a readable structure to the scheme's userspace and return the + /// pointer + #[must_use = "copying back to head/tail buffers can fail"] + pub fn capture_user( + &self, + buf: UserSlice, + ) -> Result> { + UserInner::capture_inner(&self.context, buf) + } + pub fn copy_and_capture_tail(&self, buf: &[u8]) -> Result> { + let dst_addr_space = Arc::clone( + self.context + .upgrade() + .ok_or(Error::new(ENODEV))? + .read() + .addr_space()?, + ); + + let mut tail = BorrowedHtBuf::tail()?; + let tail_frame = tail.frame(); + if buf.len() > tail.buf().len() { + return Err(Error::new(EINVAL)); + } + tail.buf_mut()[..buf.len()].copy_from_slice(buf); + + let is_pinned = true; + let dst_page = dst_addr_space.acquire_write().mmap_anywhere( + &dst_addr_space, + ONE, + PROT_READ, + |dst_page, flags, mapper, flusher| { + Ok(Grant::allocated_shared_one_page( + tail_frame, dst_page, flags, mapper, flusher, is_pinned, + )?) + }, + )?; + + let base = dst_page.start_address().data(); + let len = buf.len(); + + Ok(CaptureGuard { + base, + len, + destroyed: false, + head: CopyInfo { + src: Some(tail), + dst: None, + }, + tail: CopyInfo { + src: None, + dst: None, + }, + span: { + let (first_page, page_count, _offset) = page_range_containing(base, len); + PageSpan::new(first_page, page_count) + }, + addrsp: Some(dst_addr_space), + }) } - fn capture_inner(context_weak: &Weak>, address: usize, size: usize, flags: usize, desc_opt: Option) -> Result { - //TODO: Abstract with other grant creation - if size == 0 { - Ok(0) - } else { - let context_lock = context_weak.upgrade().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + // TODO: Use an address space Arc over a context Arc. While contexts which share address spaces + // still can access borrowed scheme pages, it would both be cleaner and would handle the case + // where the initial context is closed. + /// Capture a buffer owned by userspace, mapping it contiguously onto scheme memory. + // TODO: Hypothetical accept_head_leak, accept_tail_leak options might be useful for + // libc-controlled buffer pools. + fn capture_inner( + context_weak: &Weak>, + user_buf: UserSlice, + ) -> Result> { + let mut map_flags = MapFlags::empty(); + map_flags.set(MapFlags::PROT_READ, READ); + map_flags.set(MapFlags::PROT_WRITE, WRITE); + + if user_buf.is_empty() { + // NOTE: Rather than returning NULL, we return a dummy dangling address, that is + // happens to be non-canonical on x86. This relieves scheme handlers from having to + // check the length before e.g. creating nonnull Rust references (when an empty length + // still requires a nonnull but possibly dangling pointer, and this has in practice + // made nulld errorneously confuse an empty Some("") with None (invalid UTF-8), due to + // enum layout optimization, as the pointer was null and not dangling). A good choice + // is thus to simply set the most-significant bit to be compatible with all alignments. + return Ok(CaptureGuard { + destroyed: false, + base: DANGLING, + len: 0, + head: CopyInfo { + src: None, + dst: None, + }, + tail: CopyInfo { + src: None, + dst: None, + }, + span: PageSpan::empty(), + addrsp: None, + }); + } - let mut grants = context.grants.lock(); + let cur_space_lock = AddrSpace::current()?; + let dst_space_lock = Arc::clone( + context_weak + .upgrade() + .ok_or(Error::new(ESRCH))? + .read() + .addr_space()?, + ); + + if Arc::ptr_eq(&dst_space_lock, &cur_space_lock) { + // Same address space, no need to remap anything! + return Ok(CaptureGuard { + destroyed: false, + base: user_buf.addr(), + len: user_buf.len(), + head: CopyInfo { + src: None, + dst: None, + }, + tail: CopyInfo { + src: None, + dst: None, + }, + span: PageSpan::empty(), + addrsp: Some(dst_space_lock), + }); + } - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_table()) }; - let mut temporary_page = TemporaryPage::new(Page::containing_address(VirtualAddress::new(crate::USER_TMP_GRANT_OFFSET))); + let (src_page, page_count, offset) = page_range_containing(user_buf.addr(), user_buf.len()); - let from_address = (address/4096) * 4096; - let offset = address - from_address; - let full_size = ((offset + size + 4095)/4096) * 4096; - let mut to_address = crate::USER_GRANT_OFFSET; + let align_offset = if offset == 0 { 0 } else { PAGE_SIZE - offset }; + let (head_part_of_buf, middle_tail_part_of_buf) = user_buf + .split_at(core::cmp::min(align_offset, user_buf.len())) + .expect("split must succeed"); - let mut entry_flags = EntryFlags::PRESENT | EntryFlags::USER_ACCESSIBLE; - if flags & PROT_EXEC == 0 { - entry_flags |= EntryFlags::NO_EXECUTE; - } - if flags & PROT_READ > 0 { - //TODO: PROT_READ - } - if flags & PROT_WRITE > 0 { - entry_flags |= EntryFlags::WRITABLE; - } + let mut dst_space = dst_space_lock.acquire_write(); - let mut i = 0; - while i < grants.len() { - let start = grants[i].start_address().get(); - if to_address + full_size < start { - break; - } + let free_span = dst_space + .grants + .find_free(dst_space.mmap_min, page_count) + .ok_or(Error::new(ENOMEM))?; + + let head = if !head_part_of_buf.is_empty() { + // FIXME: Signal context can probably recursively use head/tail. + let mut array = BorrowedHtBuf::head()?; + let frame = array.frame(); + + let len = core::cmp::min(PAGE_SIZE - offset, user_buf.len()); + + if READ { + array.buf_mut()[..offset].fill(0_u8); + array.buf_mut()[offset + len..].fill(0_u8); + + let slice = &mut array.buf_mut()[offset..][..len]; + let head_part_of_buf = user_buf.limit(len).expect("always smaller than max len"); - let pages = (grants[i].size() + 4095) / 4096; - let end = start + pages * 4096; - to_address = end; - i += 1; + head_part_of_buf + .reinterpret_unchecked::() + .copy_to_slice(slice)?; + } else { + array.buf_mut().fill(0_u8); } - //TODO: Use syscall_head and syscall_tail to avoid leaking data - grants.insert(i, Grant::map_inactive( - VirtualAddress::new(from_address), - VirtualAddress::new(to_address), - full_size, - entry_flags, - desc_opt, - &mut new_table, - &mut temporary_page - )); + dst_space.mmap( + &dst_space_lock, + Some(free_span.base), + ONE, + map_flags | MAP_FIXED_NOREPLACE, + &mut Vec::new(), + move |dst_page, page_flags, mapper, flusher| { + let is_pinned = true; + Ok(Grant::allocated_shared_one_page( + frame, dst_page, page_flags, mapper, flusher, is_pinned, + )?) + }, + )?; + + let head = CopyInfo { + src: Some(array), + dst: WRITE.then_some(head_part_of_buf.reinterpret_unchecked()), + }; + + head + } else { + CopyInfo { + src: None, + dst: None, + } + }; + let (first_middle_dst_page, first_middle_src_page) = if !head_part_of_buf.is_empty() { + (free_span.base.next(), src_page.next()) + } else { + (free_span.base, src_page) + }; - Ok(to_address + offset) + let middle_page_count = middle_tail_part_of_buf.len() / PAGE_SIZE; + let tail_size = middle_tail_part_of_buf.len() % PAGE_SIZE; + + let (_middle_part_of_buf, tail_part_of_buf) = middle_tail_part_of_buf + .split_at(middle_page_count * PAGE_SIZE) + .expect("split must succeed"); + + if let Some(middle_page_count) = NonZeroUsize::new(middle_page_count) { + dst_space.mmap( + &dst_space_lock, + Some(first_middle_dst_page), + middle_page_count, + map_flags | MAP_FIXED_NOREPLACE, + &mut Vec::new(), + move |dst_page, _, mapper, flusher| { + let eager = true; + + // It doesn't make sense to allow a context, that has borrowed non-RAM physical + // memory, to DIRECTLY do scheme calls onto that memory. + // + // (TODO: Maybe there are some niche use cases for that, possibly PCI transfer + // BARs, but it doesn't make sense yet.) + let allow_phys = false; + + // Deny any attempts by the scheme, to unmap these temporary pages. The only way to + // unmap them is to respond to the scheme socket. + let is_pinned_userscheme_borrow = true; + + Ok(Grant::borrow( + Arc::clone(&cur_space_lock), + &mut *cur_space_lock.acquire_write(), + first_middle_src_page, + dst_page, + middle_page_count.get(), + map_flags, + mapper, + flusher, + eager, + allow_phys, + is_pinned_userscheme_borrow, + )?) + }, + )?; } - } - pub fn release(&self, address: usize) -> Result<()> { - if address == 0 { - Ok(()) - } else { - let context_lock = self.context.upgrade().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + let tail = if !tail_part_of_buf.is_empty() { + let tail_dst_page = first_middle_dst_page.next_by(middle_page_count); - let mut grants = context.grants.lock(); + // FIXME: Signal context can probably recursively use head/tail. + let mut array = BorrowedHtBuf::tail()?; + let frame = array.frame(); - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_table()) }; - let mut temporary_page = TemporaryPage::new(Page::containing_address(VirtualAddress::new(crate::USER_TMP_GRANT_OFFSET))); + if READ { + let (to_copy, to_zero) = array.buf_mut().split_at_mut(tail_size); - for i in 0 .. grants.len() { - let start = grants[i].start_address().get(); - let end = start + grants[i].size(); - if address >= start && address < end { - grants.remove(i).unmap_inactive(&mut new_table, &mut temporary_page); + to_zero.fill(0_u8); - return Ok(()); - } + // FIXME: remove reinterpret_unchecked + tail_part_of_buf + .reinterpret_unchecked::() + .copy_to_slice(to_copy)?; + } else { + array.buf_mut().fill(0_u8); } - Err(Error::new(EFAULT)) - } - } - - pub fn read(&self, buf: &mut [u8]) -> Result { - let packet_buf = unsafe { slice::from_raw_parts_mut( - buf.as_mut_ptr() as *mut Packet, - buf.len()/mem::size_of::()) + dst_space.mmap( + &dst_space_lock, + Some(tail_dst_page), + ONE, + map_flags | MAP_FIXED_NOREPLACE, + &mut Vec::new(), + move |dst_page, page_flags, mapper, flusher| { + let is_pinned = true; + Ok(Grant::allocated_shared_one_page( + frame, dst_page, page_flags, mapper, flusher, is_pinned, + )?) + }, + )?; + + CopyInfo { + src: Some(array), + dst: WRITE.then_some(tail_part_of_buf.reinterpret_unchecked()), + } + } else { + CopyInfo { + src: None, + dst: None, + } }; + drop(dst_space); + + let base = free_span.base.start_address().data() + offset; + Ok(CaptureGuard { + destroyed: false, + base, + len: user_buf.len(), + head, + tail, + span: { + let (first_page, page_count, _offset) = page_range_containing(base, user_buf.len()); + PageSpan::new(first_page, page_count) + }, + addrsp: Some(dst_space_lock), + }) + } + + pub fn read(&self, buf: UserSliceWo, flags: u32) -> Result { // If O_NONBLOCK is used, do not block - let nonblock = self.flags & O_NONBLOCK == O_NONBLOCK; + let nonblock = flags & O_NONBLOCK as u32 != 0; + // If unmounting, do not block so that EOF can be returned immediately - let unmounting = self.unmounting.load(Ordering::SeqCst); - let block = !(nonblock || unmounting); - if let Some(count) = self.todo.receive_into(packet_buf, block) { - if count > 0 { + let block = !(nonblock || self.unmounting.load(Ordering::SeqCst)); + + if self.v2 { + return match self + .todo + .receive_into_user(buf, block, "UserInner::read (v2)") + { // If we received requests, return them to the scheme handler - Ok(count * mem::size_of::()) - } else if unmounting { + Ok(byte_count) => Ok(byte_count), // If there were no requests and we were unmounting, return EOF - Ok(0) - } else { - // If there were no requests and O_NONBLOCK was used, return EAGAIN - Err(Error::new(EAGAIN)) + Err(Error { errno: EAGAIN }) if self.unmounting.load(Ordering::SeqCst) => Ok(0), + // If there were no requests and O_NONBLOCK was used (EAGAIN), or some other error + // occurred, return that. + Err(error) => Err(error), + }; + } else { + let mut bytes_read = 0; + + for dst in buf.in_exact_chunks(size_of::()) { + match self + .todo + .receive(block && bytes_read == 0, "UserInner::read (legacy)") + { + Ok(sqe) => { + dst.copy_exactly(&self.translate_sqe_to_packet(&sqe)?)?; + bytes_read += size_of::(); + } + Err(_) if bytes_read > 0 => return Ok(bytes_read), + Err(Error { errno: EAGAIN }) if self.unmounting.load(Ordering::SeqCst) => { + return Ok(bytes_read) + } + Err(error) => return Err(error), + } + } + Ok(bytes_read) + } + } + fn translate_sqe_to_packet(&self, sqe: &Sqe) -> Result { + let opc = Opcode::try_from_raw(sqe.opcode) + .expect("passed scheme opcode not internally recognized by kernel"); + + let uid = sqe.args[5] as u32; + let gid = (sqe.args[5] >> 32) as u32; + + Ok(Packet { + id: u64::from(sqe.tag) + 1, + pid: sqe.caller as usize, + a: match opc { + Opcode::Open => SYS_OPEN, + Opcode::Rmdir => SYS_RMDIR, + Opcode::Unlink => SYS_UNLINK, + Opcode::Close => SYS_CLOSE, + Opcode::Dup => SYS_DUP, + Opcode::Read => SYS_READ, + Opcode::Write => SYS_WRITE, + Opcode::Fsize => SYS_LSEEK, // lseek reuses the fsize "opcode", must be !v2 + Opcode::Fchmod => SYS_FCHMOD, + Opcode::Fchown => SYS_FCHOWN, + Opcode::Fcntl => SYS_FCNTL, + Opcode::Fevent => SYS_FEVENT, + Opcode::Sendfd => SYS_SENDFD, + Opcode::Flink => SYS_FLINK, + Opcode::Fpath => SYS_FPATH, + Opcode::Frename => SYS_FRENAME, + Opcode::Fstat => SYS_FSTAT, + Opcode::Fstatvfs => SYS_FSTATVFS, + Opcode::Fsync => SYS_FSYNC, + Opcode::Ftruncate => SYS_FTRUNCATE, + Opcode::Futimens => SYS_FUTIMENS, + + Opcode::MmapPrep => { + return Ok(Packet { + id: u64::from(sqe.tag) + 1, + pid: sqe.caller as usize, + a: KSMSG_MMAP_PREP, + b: sqe.args[0] as usize, + c: sqe.args[1] as usize, + d: sqe.args[2] as usize, + uid: sqe.args[3] as u32, + gid: (sqe.args[3] >> 32) as u32, + }) + } + Opcode::RequestMmap => { + return Ok(Packet { + id: u64::from(sqe.tag) + 1, + pid: sqe.caller as usize, + a: KSMSG_MMAP, + b: sqe.args[0] as usize, + c: sqe.args[1] as usize, + d: sqe.args[2] as usize, + uid: sqe.args[3] as u32, + gid: (sqe.args[3] >> 32) as u32, + }) + } + Opcode::Munmap => { + return Ok(Packet { + id: u64::from(sqe.tag) + 1, + pid: sqe.caller as usize, + a: KSMSG_MUNMAP, + b: sqe.args[0] as usize, // fd + c: sqe.args[1] as usize, // size + d: sqe.args[2] as usize, // flags + uid: sqe.args[3] as u32, // offset lo + gid: (sqe.args[3] >> 32) as u32, // offset hi + }); + } + Opcode::Getdents => { + return Ok(Packet { + id: u64::from(sqe.tag) + 1, + pid: sqe.caller as usize, + a: SYS_GETDENTS, + b: sqe.args[0] as usize, + c: sqe.args[1] as usize, + d: sqe.args[2] as usize, + uid: sqe.args[3] as u32, + gid: (sqe.args[3] >> 32) as u32, + }); + } + + Opcode::Mremap => SYS_MREMAP, + Opcode::Msync => KSMSG_MSYNC, + + Opcode::Cancel => { + return Ok(Packet { + id: 0, + a: KSMSG_CANCEL, + b: sqe.tag as usize + 1, + c: 0, + d: 0, + pid: sqe.caller as usize, + uid, + gid, + }) + } + + _ => return Err(Error::new(EOPNOTSUPP)), + }, + b: sqe.args[0] as usize, + c: sqe.args[1] as usize, + d: sqe.args[2] as usize, + + uid, + gid, + }) + } + + pub fn write(&self, buf: UserSliceRo) -> Result { + let mut bytes_read = 0; + if self.v2 { + for chunk in buf.in_exact_chunks(size_of::()) { + match ParsedCqe::parse_cqe(&unsafe { chunk.read_exact::()? }) + .and_then(|p| self.handle_parsed(&p)) + { + Ok(()) => bytes_read += size_of::(), + Err(_) if bytes_read > 0 => break, + Err(error) => return Err(error), + } } - } else if self.unmounting.load(Ordering::SeqCst) { - // If we are unmounting and there are no pending requests, return EOF - // Unmounting is read again because the previous value - // may have changed since we first blocked for packets - Ok(0) } else { - // A signal was received, return EINTR - Err(Error::new(EINTR)) + for chunk in buf.in_exact_chunks(size_of::()) { + match ParsedCqe::parse_packet(&unsafe { chunk.read_exact::()? }) + .and_then(|p| self.handle_parsed(&p)) + { + Ok(()) => bytes_read += size_of::(), + Err(_) if bytes_read > 0 => break, + Err(error) => return Err(error), + } + } } + Ok(bytes_read) } + pub fn request_fmap( + &self, + id: usize, + _offset: u64, + required_page_count: usize, + flags: MapFlags, + ) -> Result<()> { + log::info!("REQUEST FMAP"); + + let tag = self.next_id()?; + let mut states = self.states.lock(); + states[tag as usize] = State::Fmap(Arc::downgrade(&context::current())); + + /*self.todo.send(Packet { + id: packet_id, + pid: context::context_id().into(), + a: KSMSG_MMAP, + b: id, + c: flags.bits(), + d: required_page_count, + uid: offset as u32, + gid: (offset >> 32) as u32, + });*/ + self.todo.send(Sqe { + opcode: Opcode::RequestMmap as u8, + sqe_flags: SqeFlags::empty(), + _rsvd: 0, + tag, + args: [ + id as u64, + flags.bits() as u64, + required_page_count as u64, + 0, + 0, + uid_gid_hack_merge(current_uid_gid()), + ], + caller: context::current().read().pid as u64, + }); + event::trigger(self.root_id, self.handle_id, EVENT_READ); - pub fn write(&self, buf: &[u8]) -> Result { - let packet_size = mem::size_of::(); - let len = buf.len()/packet_size; - let mut i = 0; - while i < len { - let mut packet = unsafe { *(buf.as_ptr() as *const Packet).offset(i as isize) }; - if packet.id == 0 { - match packet.a { - SYS_FEVENT => event::trigger(self.scheme_id.load(Ordering::SeqCst), packet.b, packet.c), - _ => println!("Unknown scheme -> kernel message {}", packet.a) + Ok(()) + } + fn handle_parsed(&self, cqe: &ParsedCqe) -> Result<()> { + match *cqe { + ParsedCqe::RegularResponse { tag, code, extra0 } => { + self.respond(tag, Response::Regular(code, extra0))? + } + ParsedCqe::ResponseWithFd { tag, fd } => self.respond( + tag, + Response::Fd( + context::current() + .read() + .remove_file(FileHandle::from(fd)) + .ok_or(Error::new(EINVAL))? + .description, + ), + )?, + ParsedCqe::ObtainFd { + tag, + flags, + dst_fd_or_ptr, + } => { + let description = match self + .states + .lock() + .get_mut(tag as usize) + .ok_or(Error::new(EINVAL))? + { + State::Waiting { ref mut fd, .. } => fd.take().ok_or(Error::new(ENOENT))?, + _ => return Err(Error::new(ENOENT)), + }; + + // FIXME: Description can leak if there is no additional file table space. + if flags.contains(FobtainFdFlags::MANUAL_FD) { + context::current().read().insert_file( + FileHandle::from(dst_fd_or_ptr), + FileDescriptor { + description, + cloexec: true, + }, + ); + } else { + let fd = context::current() + .read() + .add_file(FileDescriptor { + description, + cloexec: true, + }) + .ok_or(Error::new(EMFILE))?; + UserSlice::wo(dst_fd_or_ptr, size_of::())?.write_usize(fd.get())?; } - } else { - if let Some((context_weak, desc, map)) = self.fmap.lock().remove(&packet.id) { - if let Ok(address) = Error::demux(packet.a) { - //TODO: Protect against sharing addresses that are not page aligned - let res = UserInner::capture_inner(&context_weak, address, map.size, map.flags, Some(desc)); - if let Ok(new_address) = res { - self.funmap.lock().insert(new_address, address); - } - packet.a = Error::mux(res); + } + ParsedCqe::ProvideMmap { + tag, + offset, + base_addr, + page_count, + } => { + log::info!( + "PROVIDE_MAP {:x} {:x} {:?} {:x}", + tag, + offset, + base_addr, + page_count + ); + + if offset % PAGE_SIZE as u64 != 0 { + return Err(Error::new(EINVAL)); + } + + if base_addr.data() % PAGE_SIZE != 0 { + return Err(Error::new(EINVAL)); + } + + if page_count != 1 { + return Err(Error::new(EINVAL)); + } + + let context = { + let mut states = self.states.lock(); + + match states.get_mut(tag as usize) { + Some(o) => match mem::replace(o, State::Placeholder) { + // invalid state + State::Placeholder => { + return Err(Error::new(EBADFD)); + } + // invalid kernel to scheme call + old_state @ (State::Waiting { .. } | State::Responded(_)) => { + *o = old_state; + return Err(Error::new(EINVAL)); + } + State::Fmap(context) => { + states.remove(tag as usize); + context + } + }, + None => return Err(Error::new(EINVAL)), + } + }; + + let context = context.upgrade().ok_or(Error::new(ESRCH))?; + + let (frame, _) = AddrSpace::current()? + .acquire_read() + .table + .utable + .translate(base_addr) + .ok_or(Error::new(EFAULT))?; + + let mut context = context.write(); + match context.status { + Status::HardBlocked { + reason: HardBlockedReason::AwaitingMmap { .. }, + } => context.status = Status::Runnable, + _ => (), + } + context.fmap_ret = Some(Frame::containing(frame)); + } + ParsedCqe::TriggerFevent { number, flags } => { + event::trigger(self.scheme_id, number, flags) + } + ParsedCqe::NoOp => (), + } + Ok(()) + } + fn respond(&self, tag: u32, mut response: Response) -> Result<()> { + let to_close; + + let mut states = self.states.lock(); + match states.get_mut(tag as usize) { + Some(o) => match mem::replace(o, State::Placeholder) { + // invalid state + State::Placeholder => return Err(Error::new(EBADFD)), + // invalid scheme to kernel call + old_state @ (State::Responded(_) | State::Fmap(_)) => { + *o = old_state; + return Err(Error::new(EINVAL)); + } + + State::Waiting { + context, + fd, + canceling, + callee_responsible, + } => { + // Convert ECANCELED to EINTR if a request was being canceled (currently always + // due to signals). + if let Response::Regular(ref mut code, _) = response + && canceling + && *code == Error::mux(Err(Error::new(ECANCELED))) + { + *code = Error::mux(Err(Error::new(EINTR))); + } + + // TODO: Require ECANCELED? + if let Response::Regular(ref mut code, _) = response + && !canceling + && *code == Error::mux(Err(Error::new(EINTR))) + { + // EINTR is valid after cancelation has been requested, but not otherwise. + // This is because the userspace signal trampoline will be invoked after a + // syscall returns EINTR. + *code = Error::mux(Err(Error::new(EIO))); + } + + to_close = fd + .and_then(|f| Arc::try_unwrap(f).ok()) + .map(RwLock::into_inner); + + if let Some(context) = context.upgrade() { + context.write().unblock(); + *o = State::Responded(response); } else { - let _ = desc.close(); + states.remove(tag as usize); } + + let unpin = true; + AddrSpace::current()?.munmap(callee_responsible, unpin)?; } + }, + // invalid state + None => return Err(Error::new(EBADFD)), + } - self.done.send(packet.id, packet.a); - } - i += 1; + if let Some(to_close) = to_close { + let _ = to_close.try_close(); } + Ok(()) + } - Ok(i * packet_size) + pub fn fevent(&self, flags: EventFlags) -> Result { + // TODO: Should the root scheme also suppress events if `flags` does not contain + // `EVENT_READ`? + Ok(if self.todo.is_currently_empty() { + EventFlags::empty() + } else { + EventFlags::EVENT_READ.intersection(flags) + }) } - pub fn fevent(&self, _flags: usize) -> Result { - Ok(0) + pub fn fsync(&self) -> Result<()> { + Ok(()) } - pub fn fsync(&self) -> Result { - Ok(0) + fn fmap_inner( + &self, + dst_addr_space: Arc, + file: usize, + map: &Map, + ) -> Result { + let unaligned_size = map.size; + + if unaligned_size == 0 { + return Err(Error::new(EINVAL)); + } + + let page_count = unaligned_size.div_ceil(PAGE_SIZE); + + if map.address % PAGE_SIZE != 0 { + return Err(Error::new(EINVAL)); + }; + let dst_base = (map.address != 0) + .then_some(Page::containing_address(VirtualAddress::new(map.address))); + + if map.offset % PAGE_SIZE != 0 { + return Err(Error::new(EINVAL)); + } + + let src_address_space = Arc::clone( + self.context + .upgrade() + .ok_or(Error::new(ENODEV))? + .read() + .addr_space()?, + ); + if Arc::ptr_eq(&src_address_space, &dst_addr_space) { + return Err(Error::new(EBUSY)); + } + + let (pid, desc) = { + let context_lock = context::current(); + let context = context_lock.read(); + // TODO: Faster, cleaner mechanism to get descriptor + let mut desc_res = Err(Error::new(EBADF)); + for context_file in context.files.read().iter().flatten() { + let (context_scheme, context_number) = { + let desc = context_file.description.read(); + (desc.scheme, desc.number) + }; + if context_scheme == self.scheme_id && context_number == file { + desc_res = Ok(context_file.clone()); + break; + } + } + let desc = desc_res?; + (context.pid, desc.description) + }; + + let response = self.call_extended_inner( + None, + /* + Packet { + id: self.next_id(), + pid: pid.into(), + a: KSMSG_MMAP_PREP, + b: file, + c: unaligned_size, + d: map.flags.bits(), + // The uid and gid can be obtained by the proc scheme anyway, if the pid is provided. + uid: map.offset as u32, + #[cfg(target_pointer_width = "64")] + gid: (map.offset >> 32) as u32, + #[cfg(target_pointer_width = "32")] + gid: 0, + }, + */ + Sqe { + opcode: Opcode::MmapPrep as u8, + sqe_flags: SqeFlags::empty(), + _rsvd: 0, + tag: self.next_id()?, + args: [ + file as u64, + unaligned_size as u64, + map.flags.bits() as u64, + map.offset as u64, + 0, + uid_gid_hack_merge(current_uid_gid()), + ], + caller: pid as u64, + }, + &mut PageSpan::empty(), + )?; + + // TODO: I've previously tested that this works, but because the scheme trait all of + // Redox's schemes currently rely on doesn't allow one-way messages, there's no current + // code using it. + + //let mapping_is_lazy = map.flags.contains(MapFlags::MAP_LAZY); + let mapping_is_lazy = false; + + let base_page_opt = match response { + Response::Regular(code, _) => (!mapping_is_lazy).then_some(Error::demux(code)?), + Response::Fd(_) => { + log::debug!("Scheme incorrectly returned an fd for fmap."); + + return Err(Error::new(EIO)); + } + }; + + let file_ref = GrantFileRef { + description: desc, + base_offset: map.offset, + }; + + let src = match base_page_opt { + Some(base_addr) => Some({ + if base_addr % PAGE_SIZE != 0 { + return Err(Error::new(EINVAL)); + } + let addr_space_lock = &src_address_space; + BorrowedFmapSource { + src_base: Page::containing_address(VirtualAddress::new(base_addr)), + addr_space_lock, + addr_space_guard: addr_space_lock.acquire_write(), + mode: if map.flags.contains(MapFlags::MAP_SHARED) { + MmapMode::Shared + } else { + MmapMode::Cow + }, + } + }), + None => None, + }; + + let page_count_nz = NonZeroUsize::new(page_count).expect("already validated map.size != 0"); + let mut notify_files = Vec::new(); + let dst_base = dst_addr_space.acquire_write().mmap( + &dst_addr_space, + dst_base, + page_count_nz, + map.flags, + &mut notify_files, + |dst_base, flags, mapper, flusher| { + Grant::borrow_fmap( + PageSpan::new(dst_base, page_count), + flags, + file_ref, + src, + &dst_addr_space, + mapper, + flusher, + ) + }, + )?; + + for map in notify_files { + let _ = map.unmap(); + } + + Ok(dst_base.start_address().data()) } } +pub struct CaptureGuard { + destroyed: bool, + base: usize, + len: usize, + span: PageSpan, + + head: CopyInfo, + tail: CopyInfo, + addrsp: Option>, +} +impl CaptureGuard { + fn base(&self) -> usize { + self.base + } + fn len(&self) -> usize { + self.len + } + fn span(&mut self) -> &mut PageSpan { + &mut self.span + } +} +struct CopyInfo { + src: Option, + + // TODO + dst: Option>, +} +impl CaptureGuard { + fn release_inner(&mut self) -> Result<()> { + if self.destroyed { + return Ok(()); + } + self.destroyed = true; + + if self.base == DANGLING { + return Ok(()); + } + + // TODO: Encode src and dst better using const generics. + if let CopyInfo { + src: Some(ref src), + dst: Some(ref mut dst), + } = self.head + { + dst.copy_from_slice(&src.buf()[self.base % PAGE_SIZE..][..dst.len()])?; + } + if let CopyInfo { + src: Some(ref src), + dst: Some(ref mut dst), + } = self.tail + { + dst.copy_from_slice(&src.buf()[..dst.len()])?; + } + let unpin = true; + if let Some(ref addrsp) = self.addrsp + && !self.span.is_empty() + { + addrsp.munmap(self.span, unpin)?; + } + + Ok(()) + } + pub fn release(mut self) -> Result<()> { + self.release_inner() + } +} +impl Drop for CaptureGuard { + fn drop(&mut self) { + let _ = self.release_inner(); + } +} +/// base..base+size => page..page+page_count*PAGE_SIZE, offset +fn page_range_containing(base: usize, size: usize) -> (Page, usize, usize) { + let first_page = Page::containing_address(VirtualAddress::new(base)); + let offset = base - first_page.start_address().data(); + + (first_page, (size + offset).div_ceil(PAGE_SIZE), offset) +} /// `UserInner` has to be wrapped +#[derive(Clone)] pub struct UserScheme { - inner: Weak + pub(crate) inner: Weak, } impl UserScheme { pub fn new(inner: Weak) -> UserScheme { - UserScheme { - inner: inner - } + UserScheme { inner } } } -impl Scheme for UserScheme { - fn open(&self, path: &[u8], flags: usize, _uid: u32, _gid: u32) -> Result { +impl KernelScheme for UserScheme { + fn kopen(&self, path: &str, flags: usize, ctx: CallerCtx) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture(path)?; - let result = inner.call(SYS_OPEN, address, path.len(), flags); - let _ = inner.release(address); - result + let mut address = inner.copy_and_capture_tail(path.as_bytes())?; + match inner.call_extended( + ctx, + None, + Opcode::Open, + [address.base(), address.len(), flags], + address.span(), + )? { + Response::Regular(code, fl) => Ok({ + let _ = Error::demux(code)?; + OpenResult::SchemeLocal( + code, + InternalFlags::from_extra0(fl).ok_or(Error::new(EINVAL))?, + ) + }), + Response::Fd(desc) => Ok(OpenResult::External(desc)), + } + } + fn rmdir(&self, path: &str, _ctx: CallerCtx) -> Result<()> { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; + let mut address = inner.copy_and_capture_tail(path.as_bytes())?; + inner.call( + Opcode::Rmdir, + [address.base(), address.len()], + address.span(), + )?; + Ok(()) } - fn chmod(&self, path: &[u8], mode: u16, _uid: u32, _gid: u32) -> Result { + fn unlink(&self, path: &str, _ctx: CallerCtx) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture(path)?; - let result = inner.call(SYS_CHMOD, address, path.len(), mode as usize); - let _ = inner.release(address); - result + let mut address = inner.copy_and_capture_tail(path.as_bytes())?; + inner.call( + Opcode::Unlink, + [address.base(), address.len()], + address.span(), + )?; + Ok(()) } - fn rmdir(&self, path: &[u8], _uid: u32, _gid: u32) -> Result { + fn fsize(&self, file: usize) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture(path)?; - let result = inner.call(SYS_RMDIR, address, path.len(), 0); - let _ = inner.release(address); - result + if !inner.v2 { + return Err(Error::new(ESPIPE)); + } + inner + .call(Opcode::Fsize, [file], &mut PageSpan::empty()) + .map(|o| o as u64) } - fn unlink(&self, path: &[u8], _uid: u32, _gid: u32) -> Result { + fn fchmod(&self, file: usize, mode: u16) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture(path)?; - let result = inner.call(SYS_UNLINK, address, path.len(), 0); - let _ = inner.release(address); - result + inner.call( + Opcode::Fchmod, + [file, mode as usize], + &mut PageSpan::empty(), + )?; + Ok(()) } - fn dup(&self, file: usize, buf: &[u8]) -> Result { + fn fchown(&self, file: usize, uid: u32, gid: u32) -> Result<()> { + match context::current().read() { + ref cx => { + if cx.euid != 0 && (uid != cx.euid || gid != cx.egid) { + return Err(Error::new(EPERM)); + } + } + } + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture(buf)?; - let result = inner.call(SYS_DUP, file, address, buf.len()); - let _ = inner.release(address); - result + inner.call( + Opcode::Fchown, + [file, uid as usize, gid as usize], + &mut PageSpan::empty(), + )?; + Ok(()) } - fn read(&self, file: usize, buf: &mut [u8]) -> Result { + fn fcntl(&self, file: usize, cmd: usize, arg: usize) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture_mut(buf)?; - let result = inner.call(SYS_READ, file, address, buf.len()); - let _ = inner.release(address); - result + inner.call(Opcode::Fcntl, [file, cmd, arg], &mut PageSpan::empty()) } - fn write(&self, file: usize, buf: &[u8]) -> Result { + fn fevent(&self, file: usize, flags: EventFlags) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture(buf)?; - let result = inner.call(SYS_WRITE, file, address, buf.len()); - let _ = inner.release(address); - result + inner + .call(Opcode::Fevent, [file, flags.bits()], &mut PageSpan::empty()) + .map(EventFlags::from_bits_truncate) } - fn seek(&self, file: usize, position: usize, whence: usize) -> Result { + fn flink(&self, file: usize, path: &str, _ctx: CallerCtx) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.call(SYS_LSEEK, file, position, whence) + let mut address = inner.copy_and_capture_tail(path.as_bytes())?; + inner.call( + Opcode::Flink, + [file, address.base(), address.len()], + address.span(), + )?; + Ok(()) } - fn fchmod(&self, file: usize, mode: u16) -> Result { + fn frename(&self, file: usize, path: &str, _ctx: CallerCtx) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.call(SYS_FCHMOD, file, mode as usize, 0) + let mut address = inner.copy_and_capture_tail(path.as_bytes())?; + inner.call( + Opcode::Frename, + [file, address.base(), address.len()], + address.span(), + )?; + Ok(()) } - fn fchown(&self, file: usize, uid: u32, gid: u32) -> Result { + fn fsync(&self, file: usize) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.call(SYS_FCHOWN, file, uid as usize, gid as usize) + inner.call(Opcode::Fsync, [file], &mut PageSpan::empty())?; + Ok(()) } - fn fcntl(&self, file: usize, cmd: usize, arg: usize) -> Result { + fn ftruncate(&self, file: usize, len: usize) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.call(SYS_FCNTL, file, cmd, arg) + inner.call(Opcode::Ftruncate, [file, len], &mut PageSpan::empty())?; + Ok(()) } - fn fevent(&self, file: usize, flags: usize) -> Result { + fn close(&self, file: usize) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.call(SYS_FEVENT, file, flags, 0) + inner.call(Opcode::Close, [file], &mut PageSpan::empty())?; + Ok(()) } + fn on_close(&self, id: usize) -> Result<()> { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; + if !inner.supports_on_close { + return self.close(id); + } - fn fmap(&self, file: usize, map: &Map) -> Result { + inner.todo.send(Sqe { + opcode: Opcode::CloseMsg as u8, + sqe_flags: SqeFlags::empty(), + _rsvd: 0, + tag: 0, + args: [id as u64, 0, 0, 0, 0, 0], + caller: 0, // TODO? + }); + Ok(()) + } + fn kdup(&self, file: usize, buf: UserSliceRo, ctx: CallerCtx) -> Result { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; + let mut address = inner.capture_user(buf)?; + let result = inner.call_extended( + ctx, + None, + Opcode::Dup, + [file, address.base(), address.len()], + address.span(), + ); + + address.release()?; + + match result? { + Response::Regular(code, fl) => Ok({ + let fd = Error::demux(code)?; + OpenResult::SchemeLocal( + fd, + InternalFlags::from_extra0(fl).ok_or(Error::new(EINVAL))?, + ) + }), + Response::Fd(desc) => Ok(OpenResult::External(desc)), + } + } + fn kfpath(&self, file: usize, buf: UserSliceWo) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; + let mut address = inner.capture_user(buf)?; + let result = inner.call( + Opcode::Fpath, + [file, address.base(), address.len()], + address.span(), + ); + address.release()?; + result + } - let (pid, uid, gid, context_lock, desc) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - // TODO: Faster, cleaner mechanism to get descriptor - let scheme = inner.scheme_id.load(Ordering::SeqCst); - let mut desc_res = Err(Error::new(EBADF)); - for context_file_opt in context.files.lock().iter() { - if let Some(context_file) = context_file_opt { - let (context_scheme, context_number) = { - let desc = context_file.description.read(); - (desc.scheme, desc.number) - }; - if context_scheme == scheme && context_number == file { - desc_res = Ok(context_file.clone()); - break; - } - } - } - let desc = desc_res?; - (context.id, context.euid, context.egid, Arc::downgrade(&context_lock), desc) - }; + fn kreadoff( + &self, + file: usize, + buf: UserSliceWo, + offset: u64, + call_flags: u32, + stored_flags: u32, + ) -> Result { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture(map)?; + if call_flags != stored_flags && !inner.v2 { + self.fcntl(file, F_SETFL, call_flags as usize)?; + } - let id = inner.next_id.fetch_add(1, Ordering::SeqCst); + let mut address = inner.capture_user(buf)?; + let result = inner.call( + Opcode::Read, + [ + file as u64, + address.base() as u64, + address.len() as u64, + offset, + u64::from(call_flags), + ], + address.span(), + ); + address.release()?; + + if call_flags != stored_flags && !inner.v2 { + self.fcntl(file, F_SETFL, stored_flags as usize)?; + } - inner.fmap.lock().insert(id, (context_lock, desc, *map)); + result + } - let result = inner.call_inner(Packet { - id: id, - pid: pid.into(), - uid: uid, - gid: gid, - a: SYS_FMAP, - b: file, - c: address, - d: mem::size_of::() - }); + fn kwriteoff( + &self, + file: usize, + buf: UserSliceRo, + offset: u64, + call_flags: u32, + stored_flags: u32, + ) -> Result { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; + if call_flags != stored_flags && !inner.v2 { + self.fcntl(file, F_SETFL, call_flags as usize)?; + } - let _ = inner.release(address); + let mut address = inner.capture_user(buf)?; + let result = inner.call( + Opcode::Write, + [ + file as u64, + address.base() as u64, + address.len() as u64, + offset, + u64::from(call_flags), + ], + address.span(), + ); + address.release()?; + + if call_flags != stored_flags && !inner.v2 { + self.fcntl(file, F_SETFL, stored_flags as usize)?; + } result } - - fn funmap(&self, new_address: usize) -> Result { - let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address_opt = { - let mut funmap = inner.funmap.lock(); - funmap.remove(&new_address) - }; - if let Some(address) = address_opt { - inner.call(SYS_FUNMAP, address, 0, 0) - } else { - Err(Error::new(EINVAL)) + fn legacy_seek(&self, id: usize, pos: isize, whence: usize) -> Option> { + let inner = self.inner.upgrade()?; + if inner.v2 { + return None; } + Some(inner.call( + Opcode::Fsize, + [id, pos as usize, whence], + &mut PageSpan::empty(), + )) } - - fn fpath(&self, file: usize, buf: &mut [u8]) -> Result { + fn kfutimens(&self, file: usize, buf: UserSliceRo) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture_mut(buf)?; - let result = inner.call(SYS_FPATH, file, address, buf.len()); - let _ = inner.release(address); + let mut address = inner.capture_user(buf)?; + let result = inner.call( + Opcode::Futimens, + [file, address.base(), address.len()], + address.span(), + ); + address.release()?; result } - - fn frename(&self, file: usize, path: &[u8], _uid: u32, _gid: u32) -> Result { + fn getdents( + &self, + file: usize, + buf: UserSliceWo, + header_size: u16, + opaque_id_start: u64, + ) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture(path)?; - let result = inner.call(SYS_FRENAME, file, address, path.len()); - let _ = inner.release(address); + let mut address = inner.capture_user(buf)?; + // TODO: Support passing the 16-byte record_len of the last dent, to make it possible to + // iterate backwards without first interating forward? The last entry will contain the + // opaque id to pass to the next getdents. Since this field is small, this would fit in the + // extra_raw field of `Cqe`s. + let result = inner.call( + Opcode::Getdents, + [ + file, + address.base(), + address.len(), + header_size.into(), + opaque_id_start as usize, + ], + address.span(), + ); + address.release()?; result } - - fn fstat(&self, file: usize, stat: &mut Stat) -> Result { + fn kfstat(&self, file: usize, stat: UserSliceWo) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture_mut(stat)?; - let result = inner.call(SYS_FSTAT, file, address, mem::size_of::()); - let _ = inner.release(address); - result + let mut address = inner.capture_user(stat)?; + let result = inner.call( + Opcode::Fstat, + [file, address.base(), address.len()], + address.span(), + ); + address.release()?; + result.map(|_| ()) } - - fn fstatvfs(&self, file: usize, stat: &mut StatVfs) -> Result { + fn kfstatvfs(&self, file: usize, stat: UserSliceWo) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address = inner.capture_mut(stat)?; - let result = inner.call(SYS_FSTATVFS, file, address, mem::size_of::()); - let _ = inner.release(address); - result + let mut address = inner.capture_user(stat)?; + let result = inner.call( + Opcode::Fstatvfs, + [file, address.base(), address.len()], + address.span(), + ); + address.release()?; + result.map(|_| ()) } + fn kfmap( + &self, + file: usize, + addr_space: &Arc, + map: &Map, + _consume: bool, + ) -> Result { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - fn fsync(&self, file: usize) -> Result { + inner.fmap_inner(Arc::clone(addr_space), file, map) + } + fn kfunmap(&self, number: usize, offset: usize, size: usize, flags: MunmapFlags) -> Result<()> { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.call(SYS_FSYNC, file, 0, 0) + + let ctx = context::current().read().caller_ctx(); + let res = inner.call_extended( + ctx, + None, + Opcode::Munmap, + [number, size, flags.bits(), offset], + &mut PageSpan::empty(), + )?; + + match res { + Response::Regular(_, _) => Ok(()), + Response::Fd(_) => Err(Error::new(EIO)), + } } + fn ksendfd( + &self, + number: usize, + desc: Arc>, + flags: SendFdFlags, + arg: u64, + ) -> Result { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - fn ftruncate(&self, file: usize, len: usize) -> Result { + let ctx = context::current().read().caller_ctx(); + let res = inner.call_extended( + ctx, + Some(desc), + Opcode::Sendfd, + [number, flags.bits(), arg as usize], + &mut PageSpan::empty(), + )?; + + match res { + Response::Regular(res, _) => Error::demux(res), + Response::Fd(_) => Err(Error::new(EIO)), + } + } + fn kcall( + &self, + id: usize, + payload: UserSliceRw, + _flags: CallFlags, + metadata: &[u64], + ) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.call(SYS_FTRUNCATE, file, len, 0) + + let mut address = inner.capture_user(payload)?; + + let mut sqe = Sqe { + opcode: Opcode::Call as u8, + sqe_flags: SqeFlags::empty(), + _rsvd: 0, + tag: inner.next_id()?, + caller: 0, // TODO? + args: [ + id as u64, + address.base() as u64, + address.len() as u64, + 0, + 0, + 0, + ], + }; + { + let dst = &mut sqe.args[3..]; + let len = dst.len().min(metadata.len()); + dst[..len].copy_from_slice(&metadata[..len]); + } + let res = inner.call_extended_inner(None, sqe, &mut address.span())?; + + match res { + Response::Regular(res, _) => Error::demux(res), + Response::Fd(_) => Err(Error::new(EIO)), + } } +} - fn futimens(&self, file: usize, times: &[TimeSpec]) -> Result { - let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let buf = unsafe { slice::from_raw_parts(times.as_ptr() as *const u8, mem::size_of::() * times.len()) }; - let address = inner.capture(buf)?; - let result = inner.call(SYS_FUTIMENS, file, address, buf.len()); - let _ = inner.release(address); - result +pub trait Args: Copy { + fn args(self) -> [u64; 6]; +} +impl Args for [u64; N] { + fn args(self) -> [u64; 6] { + assert!(self.len() <= N); + core::array::from_fn(|i| self.get(i).copied().unwrap_or(0)) + } +} +impl Args for [usize; N] { + fn args(self) -> [u64; 6] { + self.map(|s| s as u64).args() } +} - fn close(&self, file: usize) -> Result { - let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.call(SYS_CLOSE, file, 0, 0) +// TODO: Find a better way to do authentication. No scheme call currently uses arg 5 but this will +// likely change. Ideally this mechanism would also allow the scheme to query the supplementary +// group list. +fn uid_gid_hack_merge([uid, gid]: [u32; 2]) -> u64 { + u64::from(uid) | (u64::from(gid) << 32) +} +fn current_uid_gid() -> [u32; 2] { + match context::current().read() { + ref p => [p.euid, p.egid], } } diff --git a/src/startup/memory.rs b/src/startup/memory.rs new file mode 100644 index 00000000..59ed8dc7 --- /dev/null +++ b/src/startup/memory.rs @@ -0,0 +1,441 @@ +use crate::{ + arch::{consts::KERNEL_OFFSET, paging::entry::EntryFlags, rmm::page_flags, CurrentRmmArch}, + memory::PAGE_SIZE, + startup::memory::BootloaderMemoryKind::Null, +}; +use core::{ + cell::SyncUnsafeCell, + cmp::{max, min}, + mem, + slice::{self, Iter}, +}; +use rmm::{ + Arch, BumpAllocator, MemoryArea, PageFlags, PageMapper, PhysicalAddress, TableKind, + VirtualAddress, KILOBYTE, MEGABYTE, +}; + +// Keep synced with OsMemoryKind in bootloader +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[repr(u64)] +#[allow(dead_code)] +pub enum BootloaderMemoryKind { + Null = 0, + Free = 1, + Reclaim = 2, + Reserved = 3, + + // These are local to kernel + Kernel = 0x100, + Device = 0x101, + IdentityMap = 0x102, +} + +// Keep synced with OsMemoryEntry in bootloader +#[derive(Clone, Copy, Debug)] +#[repr(C, packed(8))] +struct BootloaderMemoryEntry { + pub base: u64, + pub size: u64, + pub kind: BootloaderMemoryKind, +} + +#[derive(Clone, Copy, Debug)] +struct MemoryEntry { + pub start: usize, + pub end: usize, + pub kind: BootloaderMemoryKind, +} + +impl MemoryEntry { + fn intersect(&self, other: &Self) -> Option { + let start = max(self.start, other.start); + let end = min(self.end, other.end); + if start < end { + Some(Self { + start, + end, + kind: self.kind, + }) + } else { + None + } + } + + fn combine(&self, other: &Self) -> Option { + if self.start <= other.end && self.end >= other.start { + Some(Self { + start: min(self.start, other.start), + end: max(self.end, other.end), + kind: self.kind, + }) + } else { + None + } + } +} + +struct MemoryMap { + entries: [MemoryEntry; 512], + size: usize, +} + +impl MemoryMap { + fn register(&mut self, base: usize, size: usize, kind: BootloaderMemoryKind) { + if self.size >= self.entries.len() { + panic!("Early memory map overflow!"); + } + let start = if kind == BootloaderMemoryKind::Free { + align_up(base) + } else { + align_down(base) + }; + let end = base.saturating_add(size); + let end = if kind == BootloaderMemoryKind::Free { + align_down(end) + } else { + align_up(end) + }; + if start < end { + self.entries[self.size] = MemoryEntry { start, end, kind }; + self.size += 1; + } + } + + fn iter(&self) -> Iter { + return self.entries[0..self.size].iter(); + } + + pub fn free(&self) -> impl Iterator { + self.iter().filter(|x| x.kind == BootloaderMemoryKind::Free) + } + + pub fn non_free(&self) -> impl Iterator { + self.iter().filter(|x| x.kind != BootloaderMemoryKind::Free) + } + + pub fn kernel(&self) -> Option<&MemoryEntry> { + self.iter().find(|x| x.kind == BootloaderMemoryKind::Kernel) + } + + pub fn devices(&self) -> impl Iterator { + self.iter() + .filter(|x| x.kind == BootloaderMemoryKind::Device) + } + + pub fn identity_mapped(&self) -> impl Iterator { + self.iter() + .filter(|x| x.kind == BootloaderMemoryKind::IdentityMap) + } +} + +static MEMORY_MAP: SyncUnsafeCell = SyncUnsafeCell::new(MemoryMap { + entries: [MemoryEntry { + start: 0, + end: 0, + kind: BootloaderMemoryKind::Null, + }; 512], + size: 0, +}); + +fn align_up(x: usize) -> usize { + (x.saturating_add(PAGE_SIZE - 1) / PAGE_SIZE) * PAGE_SIZE +} +fn align_down(x: usize) -> usize { + x / PAGE_SIZE * PAGE_SIZE +} + +pub fn register_memory_region(base: usize, size: usize, kind: BootloaderMemoryKind) { + if kind != Null && size != 0 { + log::debug!("Registering {:?} memory {:X} size {:X}", kind, base, size); + unsafe { (*MEMORY_MAP.get()).register(base, size, kind) } + } +} + +pub fn register_bootloader_areas(areas_base: usize, areas_size: usize) { + let bootloader_areas = unsafe { + slice::from_raw_parts( + areas_base as *const BootloaderMemoryEntry, + areas_size / mem::size_of::(), + ) + }; + for bootloader_area in bootloader_areas.iter() { + register_memory_region( + bootloader_area.base as usize, + bootloader_area.size as usize, + bootloader_area.kind, + ) + } +} + +unsafe fn add_memory(areas: &mut [MemoryArea], area_i: &mut usize, mut area: MemoryEntry) { + for reservation in (*MEMORY_MAP.get()).non_free() { + if area.end > reservation.start && area.end <= reservation.end { + log::info!( + "Memory {:X}:{:X} overlaps with reservation {:X}:{:X}", + area.start, + area.end, + reservation.start, + reservation.end + ); + area.end = reservation.start; + } + if area.start >= area.end { + return; + } + + if area.start >= reservation.start && area.start < reservation.end { + log::info!( + "Memory {:X}:{:X} overlaps with reservation {:X}:{:X}", + area.start, + area.end, + reservation.start, + reservation.end + ); + area.start = reservation.end; + } + if area.start >= area.end { + return; + } + + if area.start <= reservation.start && area.end > reservation.start { + log::info!( + "Memory {:X}:{:X} contains reservation {:X}:{:X}", + area.start, + area.end, + reservation.start, + reservation.end + ); + debug_assert!(area.start < reservation.start && reservation.end < area.end, + "Should've contained reservation entirely: memory block {:X}:{:X} reservation {:X}:{:X}", + area.start, area.end, + reservation.start, reservation.end + ); + // recurse on first part of split memory block + + add_memory( + areas, + area_i, + MemoryEntry { + end: reservation.start, + ..area + }, + ); + + // and continue with the second part + area.start = reservation.end; + } + debug_assert!( + area.intersect(reservation).is_none(), + "Intersects with reservation! memory block {:X}:{:X} reservation {:X}:{:X}", + area.start, + area.end, + reservation.start, + reservation.end + ); + debug_assert!( + area.start < area.end, + "Empty memory block {:X}:{:X}", + area.start, + area.end + ); + } + + // Combine overlapping memory areas + let mut other_i = 0; + while other_i < *area_i { + let other = &areas[other_i]; + let other = MemoryEntry { + start: other.base.data(), + end: other.base.data().saturating_add(other.size), + kind: BootloaderMemoryKind::Free, + }; + if let Some(union) = area.combine(&other) { + log::debug!( + "{:X}:{:X} overlaps with area {:X}:{:X}, combining into {:X}:{:X}", + area.start, + area.end, + other.start, + other.end, + union.start, + union.end + ); + area = union; + *area_i -= 1; // delete the original memory chunk + areas[other_i] = areas[*area_i]; + } else { + other_i += 1; + } + } + + areas[*area_i].base = PhysicalAddress::new(area.start); + areas[*area_i].size = area.end - area.start; + *area_i += 1; +} + +unsafe fn map_memory(areas: &[MemoryArea], mut bump_allocator: &mut BumpAllocator) { + let mut mapper = PageMapper::::create(TableKind::Kernel, &mut bump_allocator) + .expect("failed to create Mapper"); + + #[cfg(target_arch = "i686")] + { + // Pre-allocate all kernel PD entries so that when the page table is copied, + // these entries are synced between processes + for i in 512..1024 { + let phys = mapper + .allocator_mut() + .allocate_one() + .expect("failed to map page table"); + let flags = A::ENTRY_FLAG_READWRITE | A::ENTRY_FLAG_DEFAULT_TABLE; + mapper + .table() + .set_entry(i, PageEntry::new(phys.data(), flags)); + } + } + + // Map all physical areas at PHYS_OFFSET + for area in areas.iter() { + for i in 0..area.size / PAGE_SIZE { + let phys = area.base.add(i * PAGE_SIZE); + let virt = A::phys_to_virt(phys); + let flags = page_flags::(virt); + let flush = mapper + .map_phys(virt, phys, flags) + .expect("failed to map frame"); + flush.ignore(); // Not the active table + } + } + + let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap(); + let kernel_base = kernel_area.start; + let kernel_size = kernel_area.end - kernel_area.start; + // Map kernel at KERNEL_OFFSET and identity map too + for i in 0..kernel_size / A::PAGE_SIZE { + let phys = PhysicalAddress::new(kernel_base + i * PAGE_SIZE); + let virt = VirtualAddress::new(KERNEL_OFFSET + i * PAGE_SIZE); + let flags = page_flags::(virt); + let flush = mapper + .map_phys(virt, phys, flags) + .expect("failed to map frame"); + flush.ignore(); // Not the active table + + let virt = A::phys_to_virt(phys); + let flush = mapper + .map_phys(virt, phys, flags) + .expect("failed to map frame"); + flush.ignore(); // Not the active table + } + + for area in (*MEMORY_MAP.get()).identity_mapped() { + let base = area.start; + let size = area.end - area.start; + for i in 0..size / PAGE_SIZE { + let phys = PhysicalAddress::new(base + i * PAGE_SIZE); + let virt = A::phys_to_virt(phys); + let flags = page_flags::(virt); + let flush = mapper + .map_phys(virt, phys, flags) + .expect("failed to map frame"); + flush.ignore(); // Not the active table + } + } + + //map dev mem + for area in (*MEMORY_MAP.get()).devices() { + let base = area.start; + let size = area.end - area.start; + for i in 0..size / PAGE_SIZE { + let phys = PhysicalAddress::new(base + i * PAGE_SIZE); + let virt = A::phys_to_virt(phys); + // use the same mair_el1 value with bootloader, + // mair_el1 == 0x00000000000044FF + // set mem_attr == device memory + let flags = page_flags::(virt).custom_flag(EntryFlags::DEV_MEM.bits(), true); + let flush = mapper + .map_phys(virt, phys, flags) + .expect("failed to map frame"); + flush.ignore(); // Not the active table + } + } + + // Ensure graphical debug region remains paged + #[cfg(feature = "graphical_debug")] + { + use crate::devices::graphical_debug::FRAMEBUFFER; + + let (phys, virt, size) = *FRAMEBUFFER.lock(); + + let pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + for i in 0..pages { + let phys = PhysicalAddress::new(phys + i * PAGE_SIZE); + let virt = VirtualAddress::new(virt + i * PAGE_SIZE); + let flags = PageFlags::new().write(true).write_combining(true); + let flush = mapper + .map_phys(virt, phys, flags) + .expect("failed to map frame"); + flush.ignore(); // Not the active table + } + } + + log::debug!("Table: {:X}", mapper.table().phys().data()); + for i in 0..A::PAGE_ENTRIES { + if let Some(entry) = mapper.table().entry(i) { + if entry.present() { + log::debug!("{}: {:X}", i, entry.data()); + } + } + } + + // Use the new table + mapper.make_current(); +} + +pub unsafe fn init(low_limit: Option, high_limit: Option) { + let physmem_limit = MemoryEntry { + start: align_up(low_limit.unwrap_or(0)), + end: align_down(high_limit.unwrap_or(usize::MAX)), + kind: BootloaderMemoryKind::Free, + }; + + let areas = &mut *crate::memory::AREAS.get(); + let mut area_i = 0; + + // Copy initial memory map, and page align it + for area in (*MEMORY_MAP.get()).free() { + log::debug!("{:X}:{:X}", area.start, area.end); + + if let Some(area) = area.intersect(&physmem_limit) { + add_memory(areas, &mut area_i, area); + } + } + + areas[..area_i].sort_unstable_by_key(|area| area.base); + crate::memory::AREA_COUNT.get().write(area_i as u16); + + // free memory map in now ready + let areas = crate::memory::areas(); + + // First, calculate how much memory we have + let mut size = 0; + for area in areas.iter() { + if area.size > 0 { + log::debug!("{:X?}", area); + size += area.size; + } + } + + log::info!("Memory: {} MB", (size + (MEGABYTE - 1)) / MEGABYTE); + + // Create a basic allocator for the first pages + let mut bump_allocator = BumpAllocator::::new(areas, 0); + + map_memory(areas, &mut bump_allocator); + + // Create the physical memory map + let offset = bump_allocator.offset(); + log::info!( + "Permanently used: {} KB", + (offset + (KILOBYTE - 1)) / KILOBYTE + ); + + crate::memory::init_mm(bump_allocator); +} diff --git a/src/startup/mod.rs b/src/startup/mod.rs new file mode 100644 index 00000000..eb291915 --- /dev/null +++ b/src/startup/mod.rs @@ -0,0 +1 @@ +pub mod memory; diff --git a/src/sync/mod.rs b/src/sync/mod.rs index 833925b1..74a79956 100644 --- a/src/sync/mod.rs +++ b/src/sync/mod.rs @@ -1,7 +1,4 @@ -pub use self::wait_condition::WaitCondition; -pub use self::wait_queue::WaitQueue; -pub use self::wait_map::WaitMap; +pub use self::{wait_condition::WaitCondition, wait_queue::WaitQueue}; pub mod wait_condition; pub mod wait_queue; -pub mod wait_map; diff --git a/src/sync/wait_condition.rs b/src/sync/wait_condition.rs index 2b68f9d9..57535edd 100644 --- a/src/sync/wait_condition.rs +++ b/src/sync/wait_condition.rs @@ -1,18 +1,21 @@ -use alloc::sync::Arc; -use alloc::vec::Vec; -use spin::{Mutex, RwLock}; +use alloc::{ + sync::{Arc, Weak}, + vec::Vec, +}; +use spin::Mutex; +use spinning_top::RwSpinlock; use crate::context::{self, Context}; #[derive(Debug)] pub struct WaitCondition { - contexts: Mutex>>> + contexts: Mutex>>>, } impl WaitCondition { - pub fn new() -> WaitCondition { + pub const fn new() -> WaitCondition { WaitCondition { - contexts: Mutex::new(Vec::new()) + contexts: Mutex::new(Vec::new()), } } @@ -20,8 +23,10 @@ impl WaitCondition { pub fn notify(&self) -> usize { let mut contexts = self.contexts.lock(); let len = contexts.len(); - while let Some(context_lock) = contexts.pop() { - context_lock.write().unblock(); + while let Some(context_weak) = contexts.pop() { + if let Some(context_ref) = context_weak.upgrade() { + context_ref.write().unblock(); + } } len } @@ -30,46 +35,46 @@ impl WaitCondition { pub unsafe fn notify_signal(&self) -> usize { let contexts = self.contexts.lock(); let len = contexts.len(); - for context_lock in contexts.iter() { - context_lock.write().unblock(); + for context_weak in contexts.iter() { + if let Some(context_ref) = context_weak.upgrade() { + context_ref.write().unblock(); + } } len } - // Wait until notified. Returns false if resumed by a signal or the notify_signal function - pub fn wait(&self) -> bool { - let id; + // Wait until notified. Unlocks guard when blocking is ready. Returns false if resumed by a signal or the notify_signal function + pub fn wait(&self, guard: T, reason: &'static str) -> bool { + let current_context_ref = context::current(); { - let context_lock = { - let contexts = context::contexts(); - let context_lock = contexts.current().expect("WaitCondition::wait: no context"); - Arc::clone(&context_lock) - }; - { - let mut context = context_lock.write(); - id = context.id; - context.block(); + let mut context = current_context_ref.write(); + if let Some((control, pctl, _)) = context.sigcontrol() + && control.currently_pending_unblocked(pctl) != 0 + { + return false; + } + context.block(reason); } - self.contexts.lock().push(context_lock); + self.contexts + .lock() + .push(Arc::downgrade(¤t_context_ref)); + + drop(guard); } - unsafe { context::switch(); } + context::switch(); let mut waited = true; { let mut contexts = self.contexts.lock(); + // TODO: retain let mut i = 0; while i < contexts.len() { - let remove = { - let context = contexts[i].read(); - context.id == id - }; - - if remove { + if Weak::as_ptr(&contexts[i]) == Arc::as_ptr(¤t_context_ref) { contexts.remove(i); waited = false; break; @@ -84,7 +89,7 @@ impl WaitCondition { } impl Drop for WaitCondition { - fn drop(&mut self){ + fn drop(&mut self) { unsafe { self.notify_signal() }; } } diff --git a/src/sync/wait_map.rs b/src/sync/wait_map.rs deleted file mode 100644 index 8f683373..00000000 --- a/src/sync/wait_map.rs +++ /dev/null @@ -1,63 +0,0 @@ -use alloc::collections::BTreeMap; -use core::mem; -use spin::Mutex; - -use crate::sync::WaitCondition; - -#[derive(Debug)] -pub struct WaitMap { - pub inner: Mutex>, - pub condition: WaitCondition -} - -impl WaitMap where K: Clone + Ord { - pub fn new() -> WaitMap { - WaitMap { - inner: Mutex::new(BTreeMap::new()), - condition: WaitCondition::new() - } - } - - pub fn receive_nonblock(&self, key: &K) -> Option { - self.inner.lock().remove(key) - } - - pub fn receive(&self, key: &K) -> V { - loop { - if let Some(value) = self.receive_nonblock(key) { - return value; - } - //TODO: use false from wait condition to indicate EINTR - let _ = self.condition.wait(); - } - } - - pub fn receive_any_nonblock(&self) -> Option<(K, V)> { - let mut inner = self.inner.lock(); - if let Some(key) = inner.keys().next().cloned() { - inner.remove(&key).map(|value| (key, value)) - } else { - None - } - } - - pub fn receive_any(&self) -> (K, V) { - loop { - if let Some(entry) = self.receive_any_nonblock() { - return entry; - } - let _ = self.condition.wait(); - } - } - - pub fn receive_all(&self) -> BTreeMap { - let mut ret = BTreeMap::new(); - mem::swap(&mut ret, &mut *self.inner.lock()); - ret - } - - pub fn send(&self, key: K, value: V) { - self.inner.lock().insert(key, value); - self.condition.notify(); - } -} diff --git a/src/sync/wait_queue.rs b/src/sync/wait_queue.rs index 7ec622a4..fa9d7962 100644 --- a/src/sync/wait_queue.rs +++ b/src/sync/wait_queue.rs @@ -1,7 +1,14 @@ use alloc::collections::VecDeque; use spin::Mutex; +use syscall::{EAGAIN, EINTR}; -use crate::sync::WaitCondition; +use crate::{ + sync::WaitCondition, + syscall::{ + error::{Error, Result, EINVAL}, + usercopy::UserSliceWo, + }, +}; #[derive(Debug)] pub struct WaitQueue { @@ -10,56 +17,82 @@ pub struct WaitQueue { } impl WaitQueue { - pub fn new() -> WaitQueue { + pub const fn new() -> WaitQueue { WaitQueue { inner: Mutex::new(VecDeque::new()), - condition: WaitCondition::new() + condition: WaitCondition::new(), } } - - pub fn clone(&self) -> WaitQueue where T: Clone { - WaitQueue { - inner: Mutex::new(self.inner.lock().clone()), - condition: WaitCondition::new() - } - } - - pub fn is_empty(&self) -> bool { + pub fn is_currently_empty(&self) -> bool { self.inner.lock().is_empty() } - pub fn receive(&self) -> Option { + pub fn receive(&self, block: bool, reason: &'static str) -> Result { loop { - if let Some(value) = self.inner.lock().pop_front() { - return Some(value); - } - if ! self.condition.wait() { - return None; + let mut inner = self.inner.lock(); + + if let Some(t) = inner.pop_front() { + return Ok(t); + } else if block { + if !self.condition.wait(inner, reason) { + return Err(Error::new(EINTR)); + } + continue; + } else { + return Err(Error::new(EAGAIN)); } } } - pub fn receive_into(&self, buf: &mut [T], block: bool) -> Option { - let mut i = 0; - - if i < buf.len() && block { - buf[i] = self.receive()?; - i += 1; - } - - { + pub fn receive_into_user( + &self, + buf: UserSliceWo, + block: bool, + reason: &'static str, + ) -> Result { + loop { let mut inner = self.inner.lock(); - while i < buf.len() { - if let Some(value) = inner.pop_front() { - buf[i] = value; - i += 1; + + if inner.is_empty() { + if block { + if !self.condition.wait(inner, reason) { + return Err(Error::new(EINTR)); + } + continue; + } else if buf.is_empty() { + return Ok(0); + } else if buf.len() < core::mem::size_of::() { + return Err(Error::new(EINVAL)); } else { - break; + // TODO: EWOULDBLOCK? + return Err(Error::new(EAGAIN)); } } - } - Some(i) + let (s1, s2) = inner.as_slices(); + let s1_bytes = unsafe { + core::slice::from_raw_parts( + s1.as_ptr().cast::(), + s1.len() * core::mem::size_of::(), + ) + }; + let s2_bytes = unsafe { + core::slice::from_raw_parts( + s2.as_ptr().cast::(), + s2.len() * core::mem::size_of::(), + ) + }; + + let mut bytes_copied = buf.copy_common_bytes_from_slice(s1_bytes)?; + + if let Some(buf_for_s2) = buf.advance(s1_bytes.len()) { + bytes_copied += buf_for_s2.copy_common_bytes_from_slice(s2_bytes)?; + } + + let _ = inner.drain(..bytes_copied / core::mem::size_of::()); + + return Ok(bytes_copied); + } } pub fn send(&self, value: T) -> usize { @@ -71,14 +104,4 @@ impl WaitQueue { self.condition.notify(); len } - - pub fn send_from(&self, buf: &[T]) -> usize where T: Copy { - let len = { - let mut inner = self.inner.lock(); - inner.extend(buf.iter()); - inner.len() - }; - self.condition.notify(); - len - } } diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 94f45528..a279c357 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -1,13 +1,17 @@ +use alloc::{string::String, vec::Vec}; use core::{ascii, mem}; -use alloc::string::String; -use alloc::vec::Vec; -use super::data::{Map, Stat, TimeSpec}; -use super::flag::*; -use super::number::*; -use super::validate::*; +use super::{ + copy_path_to_buf, + data::{Map, Stat, TimeSpec}, + flag::*, + number::*, + usercopy::UserSlice, +}; -struct ByteStr<'a>(&'a[u8]); +use crate::syscall::error::Result; + +struct ByteStr<'a>(&'a [u8]); impl<'a> ::core::fmt::Debug for ByteStr<'a> { fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result { @@ -21,53 +25,68 @@ impl<'a> ::core::fmt::Debug for ByteStr<'a> { Ok(()) } } +fn debug_path(ptr: usize, len: usize) -> Result { + // TODO: PATH_MAX + UserSlice::ro(ptr, len).and_then(|slice| copy_path_to_buf(slice, 4096)) +} +fn debug_buf(ptr: usize, len: usize) -> Result> { + UserSlice::ro(ptr, len).and_then(|user| { + let mut buf = vec![0_u8; 4096]; + let count = user.copy_common_bytes_to_slice(&mut buf)?; + buf.truncate(count); + Ok(buf) + }) +} +unsafe fn read_struct(ptr: usize) -> Result { + UserSlice::ro(ptr, mem::size_of::()).and_then(|slice| slice.read_exact::()) +} - +//TODO: calling format_call with arguments from another process space will not work pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) -> String { match a { SYS_OPEN => format!( "open({:?}, {:#X})", - validate_slice(b as *const u8, c).map(ByteStr), - d - ), - SYS_CHMOD => format!( - "chmod({:?}, {:#o})", - validate_slice(b as *const u8, c).map(ByteStr), + debug_path(b, c).as_ref().map(|p| ByteStr(p.as_bytes())), d ), SYS_RMDIR => format!( "rmdir({:?})", - validate_slice(b as *const u8, c).map(ByteStr) + debug_path(b, c).as_ref().map(|p| ByteStr(p.as_bytes())), ), SYS_UNLINK => format!( "unlink({:?})", - validate_slice(b as *const u8, c).map(ByteStr) - ), - SYS_CLOSE => format!( - "close({})", b + debug_path(b, c).as_ref().map(|p| ByteStr(p.as_bytes())), ), + SYS_CLOSE => format!("close({})", b), SYS_DUP => format!( "dup({}, {:?})", b, - validate_slice(c as *const u8, d).map(ByteStr) + debug_buf(c, d).as_ref().map(|b| ByteStr(&*b)), ), SYS_DUP2 => format!( "dup2({}, {}, {:?})", b, c, - validate_slice(d as *const u8, e).map(ByteStr) + debug_buf(d, e).as_ref().map(|b| ByteStr(&*b)), ), - SYS_READ => format!( - "read({}, {:#X}, {})", + SYS_SENDFD => format!("sendfd({}, {}, {:#0x} {:#0x} {:#0x})", b, c, d, e, f,), + SYS_READ => format!("read({}, {:#X}, {})", b, c, d), + SYS_READ2 => format!( + "read2({}, {:#X}, {}, {}, {:?})", b, c, - d + d, + e, + (f != usize::MAX).then_some(RwFlags::from_bits_retain(f as u32)) ), - SYS_WRITE => format!( - "write({}, {:#X}, {})", + SYS_WRITE => format!("write({}, {:#X}, {})", b, c, d), + SYS_WRITE2 => format!( + "write2({}, {:#X}, {}, {}, {:?})", b, c, - d + d, + e, + (f != usize::MAX).then_some(RwFlags::from_bits_retain(f as u32)) ), SYS_LSEEK => format!( "lseek({}, {}, {} ({}))", @@ -77,21 +96,12 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - SEEK_SET => "SEEK_SET", SEEK_CUR => "SEEK_CUR", SEEK_END => "SEEK_END", - _ => "UNKNOWN" + _ => "UNKNOWN", }, d ), - SYS_FCHMOD => format!( - "fchmod({}, {:#o})", - b, - c - ), - SYS_FCHOWN => format!( - "fchown({}, {}, {})", - b, - c, - d - ), + SYS_FCHMOD => format!("fchmod({}, {:#o})", b, c), + SYS_FCHOWN => format!("fchown({}, {}, {})", b, c, d), SYS_FCNTL => format!( "fcntl({}, {} ({}), {:#X})", b, @@ -101,7 +111,7 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - F_SETFD => "F_SETFD", F_SETFL => "F_SETFL", F_GETFL => "F_GETFL", - _ => "UNKNOWN" + _ => "UNKNOWN", }, c, d @@ -109,226 +119,164 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - SYS_FMAP => format!( "fmap({}, {:?})", b, - validate_slice( - c as *const Map, - d/mem::size_of::() - ), - ), - SYS_FUNMAP => format!( - "funmap({:#X})", - b - ), - SYS_FPATH => format!( - "fpath({}, {:#X}, {})", - b, - c, - d - ), - SYS_FRENAME => format!( - "frename({}, {:?})", - b, - validate_slice(c as *const u8, d).map(ByteStr), + UserSlice::ro(c, d).and_then(|buf| unsafe { buf.read_exact::() }), ), + SYS_FUNMAP => format!("funmap({:#X}, {:#X})", b, c,), + SYS_FLINK => format!("flink({}, {:?})", b, debug_path(c, d),), + SYS_FPATH => format!("fpath({}, {:#X}, {})", b, c, d), + SYS_FRENAME => format!("frename({}, {:?})", b, debug_path(c, d),), SYS_FSTAT => format!( "fstat({}, {:?})", b, - validate_slice( - c as *const Stat, - d/mem::size_of::() - ), - ), - SYS_FSTATVFS => format!( - "fstatvfs({}, {:#X}, {})", - b, - c, - d - ), - SYS_FSYNC => format!( - "fsync({})", - b - ), - SYS_FTRUNCATE => format!( - "ftruncate({}, {})", - b, - c + UserSlice::ro(c, d).and_then(|buf| unsafe { buf.read_exact::() }), ), + SYS_FSTATVFS => format!("fstatvfs({}, {:#X}, {})", b, c, d), + SYS_FSYNC => format!("fsync({})", b), + SYS_FTRUNCATE => format!("ftruncate({}, {})", b, c), SYS_FUTIMENS => format!( "futimens({}, {:?})", b, - validate_slice( - c as *const TimeSpec, - d/mem::size_of::() - ), - ), + UserSlice::ro(c, d).and_then(|buf| { + let mut times = vec![unsafe { buf.read_exact::()? }]; - SYS_BRK => format!( - "brk({:#X})", - b - ), - SYS_CHDIR => format!( - "chdir({:?})", - validate_slice(b as *const u8, c).map(ByteStr) - ), - SYS_CLOCK_GETTIME => format!( - "clock_gettime({}, {:?})", - b, - validate_slice_mut(c as *mut TimeSpec, 1) - ), - SYS_CLONE => format!( - "clone({})", - b - ), - SYS_EXIT => format!( - "exit({})", - b - ), - //TODO: Cleanup, do not allocate - SYS_FEXEC => format!( - "fexec({}, {:?}, {:?})", - b, - validate_slice( - c as *const [usize; 2], - d - ).map(|slice| { - slice.iter().map(|a| - validate_slice(a[0] as *const u8, a[1]).ok() - .and_then(|s| ::core::str::from_utf8(s).ok()) - ).collect::>>() + // One or two timespecs + if let Some(second) = buf.advance(mem::size_of::()) { + times.push(unsafe { second.read_exact::()? }); + } + Ok(times) }), - validate_slice( - e as *const [usize; 2], - f - ).map(|slice| { - slice.iter().map(|a| - validate_slice(a[0] as *const u8, a[1]).ok() - .and_then(|s| ::core::str::from_utf8(s).ok()) - ).collect::>>() - }) ), + SYS_CALL => format!( + "call({b}, {c:x}+{d}, {:?}, {:0x?}", + CallFlags::from_bits_retain(e & !0xff), + // TODO: u64 + UserSlice::ro(f, (e & 0xff) * 8) + .and_then(|buf| buf.usizes().collect::>>()), + ), + + SYS_CLOCK_GETTIME => format!("clock_gettime({}, {:?})", b, unsafe { + read_struct::(c) + }), SYS_FUTEX => format!( "futex({:#X} [{:?}], {}, {}, {}, {})", b, - validate_slice_mut(b as *mut i32, 1).map(|uaddr| &mut uaddr[0]), + UserSlice::ro(b, 4).and_then(|buf| buf.read_u32()), c, d, e, f ), - SYS_GETCWD => format!( - "getcwd({:#X}, {})", - b, - c - ), - SYS_GETEGID => format!("getegid()"), - SYS_GETENS => format!("getens()"), - SYS_GETEUID => format!("geteuid()"), - SYS_GETGID => format!("getgid()"), - SYS_GETNS => format!("getns()"), - SYS_GETPGID => format!("getpgid()"), - SYS_GETPID => format!("getpid()"), - SYS_GETPPID => format!("getppid()"), - SYS_GETUID => format!("getuid()"), - SYS_IOPL => format!( - "iopl({})", - b - ), - SYS_KILL => format!( - "kill({}, {})", - b, - c - ), - SYS_SIGRETURN => format!("sigreturn()"), - SYS_SIGACTION => format!( - "sigaction({}, {:#X}, {:#X}, {:#X})", - b, - c, - d, - e - ), - SYS_SIGPROCMASK => format!( - "sigprocmask({}, {:?}, {:?})", - b, - validate_slice(c as *const [u64; 2], 1), - validate_slice(d as *const [u64; 2], 1) - ), SYS_MKNS => format!( - "mkns({:?})", - validate_slice(b as *const [usize; 2], c) - ), - SYS_MPROTECT => format!( - "mprotect({:#X}, {}, {:#X})", - b, + "mkns({:p} len: {})", + // TODO: Print out all scheme names? + + // Simply printing out simply the pointers and lengths may not provide that much useful + // debugging information, so only print the raw args. + b as *const u8, c, - d ), + SYS_MPROTECT => format!("mprotect({:#X}, {}, {:?})", b, c, MapFlags::from_bits(d)), SYS_NANOSLEEP => format!( "nanosleep({:?}, ({}, {}))", - validate_slice(b as *const TimeSpec, 1), - c, - d - ), - SYS_PHYSALLOC => format!( - "physalloc({})", - b - ), - SYS_PHYSFREE => format!( - "physfree({:#X}, {})", - b, - c - ), - SYS_PHYSMAP => format!( - "physmap({:#X}, {}, {:#X})", - b, - c, - d - ), - SYS_PHYSUNMAP => format!( - "physunmap({:#X})", - b - ), - SYS_VIRTTOPHYS => format!( - "virttophys({:#X})", - b - ), - SYS_PIPE2 => format!( - "pipe2({:?}, {})", - validate_slice_mut(b as *mut usize, 2), - c - ), - SYS_SETREGID => format!( - "setregid({}, {})", - b, - c - ), - SYS_SETRENS => format!( - "setrens({}, {})", - b, - c - ), - SYS_SETREUID => format!( - "setreuid({}, {})", - b, - c - ), - SYS_UMASK => format!( - "umask({:#o}", - b - ), - SYS_WAITPID => format!( - "waitpid({}, {:#X}, {})", - b, + unsafe { read_struct::(b) }, c, d ), SYS_YIELD => format!("yield()"), _ => format!( "UNKNOWN{} {:#X}({:#X}, {:#X}, {:#X}, {:#X}, {:#X})", - a, a, - b, - c, - d, - e, - f - ) + a, a, b, c, d, e, f + ), + } +} + +#[derive(Clone, Copy, Debug, Default)] +#[cfg(feature = "syscall_debug")] +pub struct SyscallDebugInfo { + this_switch_time: u128, + accumulated_time: u128, + do_debug: bool, +} +#[cfg(feature = "syscall_debug")] +impl SyscallDebugInfo { + pub fn on_switch_from(&mut self) { + let now = crate::time::monotonic(); + self.accumulated_time += now - core::mem::replace(&mut self.this_switch_time, now); + } + pub fn on_switch_to(&mut self) { + self.this_switch_time = crate::time::monotonic(); } } +#[cfg(feature = "syscall_debug")] +pub fn debug_start([a, b, c, d, e, f]: [usize; 6]) { + let do_debug = if false && crate::context::current().read().name.contains("init") { + if a == SYS_CLOCK_GETTIME || a == SYS_YIELD || a == SYS_FUTEX { + false + } else if (a == SYS_WRITE || a == SYS_FSYNC) && (b == 1 || b == 2) { + false + } else { + true + } + } else { + false + }; + + let debug_start = if do_debug { + let context_lock = crate::context::current(); + { + let context = context_lock.read(); + print!("{} (*{}*): ", context.name, context.debug_id,); + } + + // Do format_call outside print! so possible exception handlers cannot reentrantly + // deadlock. + let string = format_call(a, b, c, d, e, f); + println!("{}", string); + + crate::time::monotonic() + } else { + 0 + }; + + crate::percpu::PercpuBlock::current() + .syscall_debug_info + .set(SyscallDebugInfo { + accumulated_time: 0, + this_switch_time: debug_start, + do_debug, + }); +} +#[cfg(feature = "syscall_debug")] +pub fn debug_end([a, b, c, d, e, f]: [usize; 6], result: Result) { + let debug_info = crate::percpu::PercpuBlock::current() + .syscall_debug_info + .take(); + + if !debug_info.do_debug { + return; + } + let debug_duration = + debug_info.accumulated_time + (crate::time::monotonic() - debug_info.this_switch_time); + + let context_lock = crate::context::current(); + { + let context = context_lock.read(); + print!("{} (*{}*): ", context.name, context.debug_id,); + } + + // Do format_call outside print! so possible exception handlers cannot reentrantly + // deadlock. + let string = format_call(a, b, c, d, e, f); + print!("{} = ", string); + + match result { + Ok(ref ok) => { + print!("Ok({} ({:#X}))", ok, ok); + } + Err(ref err) => { + print!("Err({} ({:#X}))", err, err.errno); + } + } + + println!(" in {} ns", debug_duration); +} diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs deleted file mode 100644 index 1ab0a4cb..00000000 --- a/src/syscall/driver.rs +++ /dev/null @@ -1,144 +0,0 @@ -use crate::macros::InterruptStack; -use crate::memory::{allocate_frames, deallocate_frames, Frame}; -use crate::paging::{ActivePageTable, PhysicalAddress, VirtualAddress}; -use crate::paging::entry::EntryFlags; -use crate::context; -use crate::context::memory::Grant; -use crate::syscall::error::{Error, EFAULT, EINVAL, ENOMEM, EPERM, ESRCH, Result}; -use crate::syscall::flag::{PHYSMAP_WRITE, PHYSMAP_WRITE_COMBINE, PHYSMAP_NO_CACHE}; - -fn enforce_root() -> Result<()> { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - if context.euid == 0 { - Ok(()) - } else { - Err(Error::new(EPERM)) - } -} - -pub fn iopl(level: usize, stack: &mut InterruptStack) -> Result { - enforce_root()?; - - if level > 3 { - return Err(Error::new(EINVAL)); - } - - stack.iret.rflags = (stack.iret.rflags & !(3 << 12)) | ((level & 3) << 12); - - Ok(0) -} - -pub fn inner_physalloc(size: usize) -> Result { - allocate_frames((size + 4095)/4096).ok_or(Error::new(ENOMEM)).map(|frame| frame.start_address().get()) -} -pub fn physalloc(size: usize) -> Result { - enforce_root()?; - inner_physalloc(size) -} - -pub fn inner_physfree(physical_address: usize, size: usize) -> Result { - deallocate_frames(Frame::containing_address(PhysicalAddress::new(physical_address)), (size + 4095)/4096); - - //TODO: Check that no double free occured - Ok(0) -} -pub fn physfree(physical_address: usize, size: usize) -> Result { - enforce_root()?; - inner_physfree(physical_address, size) -} - -//TODO: verify exlusive access to physical memory -pub fn inner_physmap(physical_address: usize, size: usize, flags: usize) -> Result { - //TODO: Abstract with other grant creation - if size == 0 { - Ok(0) - } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut grants = context.grants.lock(); - - let from_address = (physical_address/4096) * 4096; - let offset = physical_address - from_address; - let full_size = ((offset + size + 4095)/4096) * 4096; - let mut to_address = crate::USER_GRANT_OFFSET; - - let mut entry_flags = EntryFlags::PRESENT | EntryFlags::NO_EXECUTE | EntryFlags::USER_ACCESSIBLE; - if flags & PHYSMAP_WRITE == PHYSMAP_WRITE { - entry_flags |= EntryFlags::WRITABLE; - } - if flags & PHYSMAP_WRITE_COMBINE == PHYSMAP_WRITE_COMBINE { - entry_flags |= EntryFlags::HUGE_PAGE; - } - if flags & PHYSMAP_NO_CACHE == PHYSMAP_NO_CACHE { - entry_flags |= EntryFlags::NO_CACHE; - } - - let mut i = 0; - while i < grants.len() { - let start = grants[i].start_address().get(); - if to_address + full_size < start { - break; - } - - let pages = (grants[i].size() + 4095) / 4096; - let end = start + pages * 4096; - to_address = end; - i += 1; - } - - grants.insert(i, Grant::physmap( - PhysicalAddress::new(from_address), - VirtualAddress::new(to_address), - full_size, - entry_flags - )); - - Ok(to_address + offset) - } -} -pub fn physmap(physical_address: usize, size: usize, flags: usize) -> Result { - enforce_root()?; - inner_physmap(physical_address, size, flags) -} - -pub fn inner_physunmap(virtual_address: usize) -> Result { - if virtual_address == 0 { - Ok(0) - } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut grants = context.grants.lock(); - - for i in 0 .. grants.len() { - let start = grants[i].start_address().get(); - let end = start + grants[i].size(); - if virtual_address >= start && virtual_address < end { - grants.remove(i).unmap(); - - return Ok(0); - } - } - - Err(Error::new(EFAULT)) - } -} -pub fn physunmap(virtual_address: usize) -> Result { - enforce_root()?; - inner_physunmap(virtual_address) -} - -pub fn virttophys(virtual_address: usize) -> Result { - enforce_root()?; - - let active_table = unsafe { ActivePageTable::new() }; - match active_table.translate(VirtualAddress::new(virtual_address)) { - Some(physical_address) => Ok(physical_address.get()), - None => Err(Error::new(EFAULT)) - } -} diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index 1746d77a..7c24c820 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -1,264 +1,176 @@ //! Filesystem syscalls -use core::sync::atomic::Ordering; -use alloc::sync::Arc; -use spin::RwLock; - -use crate::context; -use crate::scheme::{self, FileHandle}; -use crate::syscall; -use crate::syscall::data::{Packet, Stat}; -use crate::syscall::error::*; -use crate::syscall::flag::{F_GETFD, F_SETFD, F_GETFL, F_SETFL, F_DUPFD, O_ACCMODE, O_DIRECTORY, O_RDONLY, O_SYMLINK, O_WRONLY, MODE_DIR, MODE_FILE, O_CLOEXEC}; -use crate::context::file::{FileDescriptor, FileDescription}; - -pub fn file_op(a: usize, fd: FileHandle, c: usize, d: usize) -> Result { - let (file, pid, uid, gid) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let file = context.get_file(fd).ok_or(Error::new(EBADF))?; - (file, context.id, context.euid, context.egid) - }; - - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(file.description.read().scheme).ok_or(Error::new(EBADF))?; - Arc::clone(&scheme) - }; - - let mut packet = Packet { - id: 0, - pid: pid.into(), - uid: uid, - gid: gid, - a: a, - b: file.description.read().number, - c: c, - d: d - }; +use core::{mem::size_of, num::NonZeroUsize}; - scheme.handle(&mut packet); - - Error::demux(packet.a) -} - -pub fn file_op_slice(a: usize, fd: FileHandle, slice: &[u8]) -> Result { - file_op(a, fd, slice.as_ptr() as usize, slice.len()) -} +use alloc::{sync::Arc, vec::Vec}; +use redox_path::RedoxPath; +use spin::RwLock; -pub fn file_op_mut_slice(a: usize, fd: FileHandle, slice: &mut [u8]) -> Result { - file_op(a, fd, slice.as_mut_ptr() as usize, slice.len()) +use crate::{ + context::{ + self, + file::{FileDescription, FileDescriptor, InternalFlags}, + memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions}, + }, + paging::{Page, VirtualAddress, PAGE_SIZE}, + scheme::{self, CallerCtx, FileHandle, KernelScheme, OpenResult}, + syscall::{data::Stat, error::*, flag::*}, +}; + +use super::usercopy::{UserSlice, UserSliceRo, UserSliceRw, UserSliceWo}; + +pub fn file_op_generic( + fd: FileHandle, + op: impl FnOnce(&dyn KernelScheme, usize) -> Result, +) -> Result { + file_op_generic_ext(fd, |s, _, desc| op(s, desc.number)) } - -/// Change the current working directory -pub fn chdir(path: &[u8]) -> Result { - let fd = open(path, O_RDONLY | O_DIRECTORY)?; - let mut stat = Stat::default(); - let stat_res = file_op_mut_slice(syscall::number::SYS_FSTAT, fd, &mut stat); - let _ = close(fd); - stat_res?; - if stat.st_mode & (MODE_FILE | MODE_DIR) == MODE_DIR { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let canonical = context.canonicalize(path); - *context.cwd.lock() = canonical; - Ok(0) - } else { - Err(Error::new(ENOTDIR)) - } +pub fn file_op_generic_ext( + fd: FileHandle, + op: impl FnOnce(&dyn KernelScheme, Arc>, FileDescription) -> Result, +) -> Result { + let file = context::current() + .read() + .get_file(fd) + .ok_or(Error::new(EBADF))?; + let desc = *file.description.read(); + + let scheme = scheme::schemes() + .get(desc.scheme) + .ok_or(Error::new(EBADF))? + .clone(); + + op(&*scheme, file.description, desc) } - -/// Get the current working directory -pub fn getcwd(buf: &mut [u8]) -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let cwd = context.cwd.lock(); - let mut i = 0; - while i < buf.len() && i < cwd.len() { - buf[i] = cwd[i]; - i += 1; +pub fn copy_path_to_buf(raw_path: UserSliceRo, max_len: usize) -> Result { + let mut path_buf = vec![0_u8; max_len]; + if raw_path.len() > path_buf.len() { + return Err(Error::new(ENAMETOOLONG)); } - Ok(i) + let path_len = raw_path.copy_common_bytes_to_slice(&mut path_buf)?; + path_buf.truncate(path_len); + alloc::string::String::from_utf8(path_buf).map_err(|_| Error::new(EINVAL)) + //core::str::from_utf8(&path_buf[..path_len]).map_err(|_| Error::new(EINVAL)) } +// TODO: Define elsewhere +const PATH_MAX: usize = PAGE_SIZE; /// Open syscall -pub fn open(path: &[u8], flags: usize) -> Result { - let (mut path_canon, uid, gid, scheme_ns, umask) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.canonicalize(path), context.euid, context.egid, context.ens, context.umask) +pub fn open(raw_path: UserSliceRo, flags: usize) -> Result { + let (pid, uid, gid, scheme_ns) = match context::current().read() { + ref cx => (cx.pid.into(), cx.euid, cx.egid, cx.ens), }; - let flags = (flags & (!0o777)) | (flags & 0o777) & (!(umask & 0o777)); - - //println!("open {}", unsafe { ::core::str::from_utf8_unchecked(&path_canon) }); - - for _level in 0..32 { // XXX What should the limit be? - //println!(" level {} = {:?}", _level, ::core::str::from_utf8(&path_canon)); - - let mut parts = path_canon.splitn(2, |&b| b == b':'); - let scheme_name_opt = parts.next(); - let reference_opt = parts.next(); - - let (scheme_id, file_id) = { - let scheme_name = scheme_name_opt.ok_or(Error::new(ENODEV))?; - let (scheme_id, scheme) = { - let schemes = scheme::schemes(); - let (scheme_id, scheme) = schemes.get_name(scheme_ns, scheme_name).ok_or(Error::new(ENODEV))?; - (scheme_id, Arc::clone(&scheme)) - }; - let reference = reference_opt.unwrap_or(b""); - let file_id = match scheme.open(reference, flags, uid, gid) { - Ok(ok) => ok, - Err(err) => if err.errno == EXDEV { - let resolve_flags = O_CLOEXEC | O_SYMLINK | O_RDONLY; - let resolve_id = scheme.open(reference, resolve_flags, uid, gid)?; - - let mut buf = [0; 4096]; - let res = scheme.read(resolve_id, &mut buf); - - let _ = scheme.close(resolve_id); - - let count = res?; - - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - path_canon = context.canonicalize(&buf[..count]); - - continue; - } else { - return Err(err); - } - }; - (scheme_id, file_id) - }; - - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - return context.add_file(FileDescriptor { - description: Arc::new(RwLock::new(FileDescription { - scheme: scheme_id, - number: file_id, - flags: flags & !O_CLOEXEC, - })), - cloexec: flags & O_CLOEXEC == O_CLOEXEC, - }).ok_or(Error::new(EMFILE)); + // TODO: BorrowedHtBuf! + + /* + let mut path_buf = BorrowedHtBuf::head()?; + let path = path_buf.use_for_string(raw_path)?; + */ + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + + // Display a deprecation warning for any usage of the legacy scheme syntax (scheme:/path) + // FIXME remove entries from this list as the respective programs get updated + if path_buf.contains(':') + && !path_buf.starts_with(':') + && path_buf != "null:" // FIXME Remove exception at next rustc update (rust#138457) + && path_buf != "sys:exe" // FIXME Remove exception at next rustc update (rust#138457) + && !path_buf.starts_with("orbital:") + { + let name = context::current().read().name.clone(); + if name.contains("cosmic") && (path_buf == "event:" || path_buf.starts_with("time:")) { + // FIXME cosmic apps likely need crate updates + } else { + println!("deprecated: legacy path {:?} used by {}", path_buf, name); + } } - Err(Error::new(ELOOP)) -} -pub fn pipe2(fds: &mut [usize], flags: usize) -> Result { - if fds.len() >= 2 { - let scheme_id = crate::scheme::pipe::PIPE_SCHEME_ID.load(Ordering::SeqCst); - let (read_id, write_id) = crate::scheme::pipe::pipe(flags); + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (scheme_name, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let read_fd = context.add_file(FileDescriptor { - description: Arc::new(RwLock::new(FileDescription { - scheme: scheme_id, - number: read_id, - flags: O_RDONLY | flags & !O_ACCMODE & !O_CLOEXEC, - })), - cloexec: flags & O_CLOEXEC == O_CLOEXEC, - }).ok_or(Error::new(EMFILE))?; - - let write_fd = context.add_file(FileDescriptor { - description: Arc::new(RwLock::new(FileDescription { - scheme: scheme_id, - number: write_id, - flags: O_WRONLY | flags & !O_ACCMODE & !O_CLOEXEC, - })), - cloexec: flags & O_CLOEXEC == O_CLOEXEC, - }).ok_or(Error::new(EMFILE))?; - - fds[0] = read_fd.into(); - fds[1] = write_fd.into(); - - Ok(0) - } else { - Err(Error::new(EFAULT)) - } -} + let description = { + let (scheme_id, scheme) = { + let schemes = scheme::schemes(); + let (scheme_id, scheme) = schemes + .get_name(scheme_ns, scheme_name.as_ref()) + .ok_or(Error::new(ENODEV))?; + (scheme_id, scheme.clone()) + }; -/// chmod syscall -pub fn chmod(path: &[u8], mode: u16) -> Result { - let (path_canon, uid, gid, scheme_ns) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.canonicalize(path), context.euid, context.egid, context.ens) + match scheme.kopen(reference.as_ref(), flags, CallerCtx { uid, gid, pid })? { + OpenResult::SchemeLocal(number, internal_flags) => { + Arc::new(RwLock::new(FileDescription { + scheme: scheme_id, + number, + offset: 0, + flags: (flags & !O_CLOEXEC) as u32, + internal_flags, + })) + } + OpenResult::External(desc) => desc, + } }; + //drop(path_buf); - let mut parts = path_canon.splitn(2, |&b| b == b':'); - let scheme_name_opt = parts.next(); - let reference_opt = parts.next(); - - let scheme_name = scheme_name_opt.ok_or(Error::new(ENODEV))?; - let scheme = { - let schemes = scheme::schemes(); - let (_scheme_id, scheme) = schemes.get_name(scheme_ns, scheme_name).ok_or(Error::new(ENODEV))?; - Arc::clone(&scheme) - }; - scheme.chmod(reference_opt.unwrap_or(b""), mode, uid, gid) + context::current() + .read() + .add_file(FileDescriptor { + description, + cloexec: flags & O_CLOEXEC == O_CLOEXEC, + }) + .ok_or(Error::new(EMFILE)) } /// rmdir syscall -pub fn rmdir(path: &[u8]) -> Result { - let (path_canon, uid, gid, scheme_ns) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.canonicalize(path), context.euid, context.egid, context.ens) +pub fn rmdir(raw_path: UserSliceRo) -> Result<()> { + let (scheme_ns, caller_ctx) = match context::current().read() { + ref cx => (cx.ens, cx.caller_ctx()), }; - let mut parts = path_canon.splitn(2, |&b| b == b':'); - let scheme_name_opt = parts.next(); - let reference_opt = parts.next(); + /* + let mut path_buf = BorrowedHtBuf::head()?; + let path = path_buf.use_for_string(raw_path)?; + */ + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (scheme_name, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - let scheme_name = scheme_name_opt.ok_or(Error::new(ENODEV))?; let scheme = { let schemes = scheme::schemes(); - let (_scheme_id, scheme) = schemes.get_name(scheme_ns, scheme_name).ok_or(Error::new(ENODEV))?; - Arc::clone(&scheme) + let (_scheme_id, scheme) = schemes + .get_name(scheme_ns, scheme_name.as_ref()) + .ok_or(Error::new(ENODEV))?; + scheme.clone() }; - scheme.rmdir(reference_opt.unwrap_or(b""), uid, gid) + scheme.rmdir(reference.as_ref(), caller_ctx) } /// Unlink syscall -pub fn unlink(path: &[u8]) -> Result { - let (path_canon, uid, gid, scheme_ns) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.canonicalize(path), context.euid, context.egid, context.ens) +pub fn unlink(raw_path: UserSliceRo) -> Result<()> { + let (scheme_ns, caller_ctx) = match context::current().read() { + ref cx => (cx.ens, cx.caller_ctx()), }; + /* + let mut path_buf = BorrowedHtBuf::head()?; + let path = path_buf.use_for_string(raw_path)?; + */ + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (scheme_name, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - let mut parts = path_canon.splitn(2, |&b| b == b':'); - let scheme_name_opt = parts.next(); - let reference_opt = parts.next(); - - let scheme_name = scheme_name_opt.ok_or(Error::new(ENODEV))?; let scheme = { let schemes = scheme::schemes(); - let (_scheme_id, scheme) = schemes.get_name(scheme_ns, scheme_name).ok_or(Error::new(ENODEV))?; - Arc::clone(&scheme) + let (_scheme_id, scheme) = schemes + .get_name(scheme_ns, scheme_name.as_ref()) + .ok_or(Error::new(ENODEV))?; + scheme.clone() }; - scheme.unlink(reference_opt.unwrap_or(b""), uid, gid) + scheme.unlink(reference.as_ref(), caller_ctx) } /// Close syscall -pub fn close(fd: FileHandle) -> Result { +pub fn close(fd: FileHandle) -> Result<()> { let file = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context_lock = context::current(); let context = context_lock.read(); context.remove_file(fd).ok_or(Error::new(EBADF))? }; @@ -266,15 +178,14 @@ pub fn close(fd: FileHandle) -> Result { file.close() } -fn duplicate_file(fd: FileHandle, buf: &[u8]) -> Result { - let file = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - context.get_file(fd).ok_or(Error::new(EBADF))? - }; +fn duplicate_file(fd: FileHandle, user_buf: UserSliceRo) -> Result { + let caller_ctx = context::current().read().caller_ctx(); + let file = context::current() + .read() + .get_file(fd) + .ok_or(Error::new(EBADF))?; - if buf.is_empty() { + if user_buf.is_empty() { Ok(FileDescriptor { description: Arc::clone(&file.description), cloexec: false, @@ -282,95 +193,180 @@ fn duplicate_file(fd: FileHandle, buf: &[u8]) -> Result { } else { let description = file.description.read(); - let new_id = { - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(description.scheme).ok_or(Error::new(EBADF))?; - Arc::clone(&scheme) - }; - scheme.dup(description.number, buf)? + let new_description = { + let scheme = scheme::schemes() + .get(description.scheme) + .ok_or(Error::new(EBADF))? + .clone(); + + match scheme.kdup(description.number, user_buf, caller_ctx)? { + OpenResult::SchemeLocal(number, internal_flags) => { + Arc::new(RwLock::new(FileDescription { + offset: 0, + internal_flags, + scheme: description.scheme, + number, + flags: description.flags, + })) + } + OpenResult::External(desc) => desc, + } }; Ok(FileDescriptor { - description: Arc::new(RwLock::new(FileDescription { - scheme: description.scheme, - number: new_id, - flags: description.flags, - })), + description: new_description, cloexec: false, }) } } /// Duplicate file descriptor -pub fn dup(fd: FileHandle, buf: &[u8]) -> Result { +pub fn dup(fd: FileHandle, buf: UserSliceRo) -> Result { let new_file = duplicate_file(fd, buf)?; - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - context.add_file(new_file).ok_or(Error::new(EMFILE)) + context::current() + .read() + .add_file(new_file) + .ok_or(Error::new(EMFILE)) } /// Duplicate file descriptor, replacing another -pub fn dup2(fd: FileHandle, new_fd: FileHandle, buf: &[u8]) -> Result { +pub fn dup2(fd: FileHandle, new_fd: FileHandle, buf: UserSliceRo) -> Result { if fd == new_fd { Ok(new_fd) } else { let _ = close(new_fd); let new_file = duplicate_file(fd, buf)?; - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + let context_ref = context::current(); + let context = context_ref.read(); - context.insert_file(new_fd, new_file).ok_or(Error::new(EMFILE)) + context + .insert_file(new_fd, new_file) + .ok_or(Error::new(EMFILE)) } } +pub fn call( + fd: FileHandle, + payload: UserSliceRw, + flags: CallFlags, + metadata: UserSliceRo, +) -> Result { + let mut meta = [0_u64; 3]; + + // TODO: bytemuck/plain + let copied = metadata.copy_common_bytes_to_slice(unsafe { + core::slice::from_raw_parts_mut(meta.as_mut_ptr().cast(), meta.len() * 8) + })?; + + let file = (match ( + context::current().read(), + flags.contains(CallFlags::CONSUME), + ) { + (ctxt, true) => ctxt.remove_file(fd), + (ctxt, false) => ctxt.get_file(fd), + }) + .ok_or(Error::new(EBADF))?; + + let (scheme_id, number) = { + let desc = file.description.read(); + (desc.scheme, desc.number) + }; + let scheme = scheme::schemes() + .get(scheme_id) + .ok_or(Error::new(EBADFD))? + .clone(); + + scheme.kcall(number, payload, flags, &meta[..copied / 8]) +} + +pub fn sendfd(socket: FileHandle, fd: FileHandle, flags_raw: usize, arg: u64) -> Result { + let requested_flags = SendFdFlags::from_bits(flags_raw).ok_or(Error::new(EINVAL))?; + + let (scheme, number, desc_to_send) = { + let current_lock = context::current(); + let current = current_lock.read(); + + // TODO: Ensure deadlocks can't happen + + let (scheme, number) = match current + .get_file(socket) + .ok_or(Error::new(EBADF))? + .description + .read() + { + ref desc => (desc.scheme, desc.number), + }; + let scheme = scheme::schemes() + .get(scheme) + .ok_or(Error::new(ENODEV))? + .clone(); + + ( + scheme, + number, + current + .remove_file(fd) + .ok_or(Error::new(EBADF))? + .description, + ) + }; + + // Inform the scheme whether there are still references to the file description to be sent, + // either in the current file table or in other file tables, regardless of whether EXCLUSIVE is + // requested. + + let flags_to_scheme = if Arc::strong_count(&desc_to_send) == 1 { + SendFdFlags::EXCLUSIVE + } else { + if requested_flags.contains(SendFdFlags::EXCLUSIVE) { + return Err(Error::new(EBUSY)); + } + SendFdFlags::empty() + }; + + scheme.ksendfd(number, desc_to_send, flags_to_scheme, arg) +} /// File descriptor controls pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize) -> Result { - let file = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - context.get_file(fd).ok_or(Error::new(EBADF))? - }; + let file = context::current() + .read() + .get_file(fd) + .ok_or(Error::new(EBADF))?; let description = file.description.read(); + if cmd == F_DUPFD { + // Not in match because 'files' cannot be locked + let new_file = duplicate_file(fd, UserSlice::empty())?; + + let context_lock = context::current(); + let context = context_lock.read(); + + return context + .add_file_min(new_file, arg) + .ok_or(Error::new(EMFILE)) + .map(FileHandle::into); + } + // Communicate fcntl with scheme - if cmd != F_DUPFD && cmd != F_GETFD && cmd != F_SETFD { - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(description.scheme).ok_or(Error::new(EBADF))?; - Arc::clone(&scheme) - }; + if cmd != F_GETFD && cmd != F_SETFD { + let scheme = scheme::schemes() + .get(description.scheme) + .ok_or(Error::new(EBADF))? + .clone(); + scheme.fcntl(description.number, cmd, arg)?; }; // Perform kernel operation if scheme agrees { - if cmd == F_DUPFD { - // Not in match because 'files' cannot be locked - let new_file = duplicate_file(fd, &[])?; - - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - return context.add_file_min(new_file, arg) - .ok_or(Error::new(EMFILE)) - .map(FileHandle::into); - } - - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context_lock = context::current(); let context = context_lock.read(); - let mut files = context.files.lock(); - match *files.get_mut(fd.into()).ok_or(Error::new(EBADF))? { + let mut files = context.files.write(); + match *files.get_mut(fd.get()).ok_or(Error::new(EBADF))? { Some(ref mut file) => match cmd { F_GETFD => { if file.cloexec { @@ -378,108 +374,305 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize) -> Result { } else { Ok(0) } - }, + } F_SETFD => { file.cloexec = arg & O_CLOEXEC == O_CLOEXEC; Ok(0) - }, - F_GETFL => { - Ok(description.flags) - }, + } + F_GETFL => Ok(description.flags as usize), F_SETFL => { - let new_flags = (description.flags & O_ACCMODE) | (arg & ! O_ACCMODE); + let new_flags = + (description.flags & O_ACCMODE as u32) | (arg as u32 & !O_ACCMODE as u32); drop(description); file.description.write().flags = new_flags; Ok(0) - }, - _ => { - Err(Error::new(EINVAL)) } + _ => Err(Error::new(EINVAL)), }, - None => Err(Error::new(EBADF)) + None => Err(Error::new(EBADF)), } } } -pub fn frename(fd: FileHandle, path: &[u8]) -> Result { - let file = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let file = context.get_file(fd).ok_or(Error::new(EBADF))?; - file +pub fn flink(fd: FileHandle, raw_path: UserSliceRo) -> Result<()> { + let (caller_ctx, scheme_ns) = match context::current().read() { + ref cx => (cx.caller_ctx(), cx.ens), }; + let file = context::current() + .read() + .get_file(fd) + .ok_or(Error::new(EBADF))?; + + /* + let mut path_buf = BorrowedHtBuf::head()?; + let path = path_buf.use_for_string(raw_path)?; + */ + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (scheme_name, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - let (path_canon, uid, gid, scheme_ns) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.canonicalize(path), context.euid, context.egid, context.ens) + let (scheme_id, scheme) = { + let schemes = scheme::schemes(); + let (scheme_id, scheme) = schemes + .get_name(scheme_ns, scheme_name.as_ref()) + .ok_or(Error::new(ENODEV))?; + (scheme_id, scheme.clone()) }; - let mut parts = path_canon.splitn(2, |&b| b == b':'); - let scheme_name_opt = parts.next(); - let reference_opt = parts.next(); + let description = file.description.read(); + + if scheme_id != description.scheme { + return Err(Error::new(EXDEV)); + } + + scheme.flink(description.number, reference.as_ref(), caller_ctx) +} + +pub fn frename(fd: FileHandle, raw_path: UserSliceRo) -> Result<()> { + let (caller_ctx, scheme_ns) = match context::current().read() { + ref cx => (cx.caller_ctx(), cx.ens), + }; + let file = context::current() + .read() + .get_file(fd) + .ok_or(Error::new(EBADF))?; + + /* + let mut path_buf = BorrowedHtBuf::head()?; + let path = path_buf.use_for_string(raw_path)?; + */ + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (scheme_name, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - let scheme_name = scheme_name_opt.ok_or(Error::new(ENODEV))?; let (scheme_id, scheme) = { let schemes = scheme::schemes(); - let (scheme_id, scheme) = schemes.get_name(scheme_ns, scheme_name).ok_or(Error::new(ENODEV))?; + let (scheme_id, scheme) = schemes + .get_name(scheme_ns, scheme_name.as_ref()) + .ok_or(Error::new(ENODEV))?; (scheme_id, scheme.clone()) }; let description = file.description.read(); - if scheme_id == description.scheme { - scheme.frename(description.number, reference_opt.unwrap_or(b""), uid, gid) - } else { - Err(Error::new(EXDEV)) + if scheme_id != description.scheme { + return Err(Error::new(EXDEV)); } + + scheme.frename(description.number, reference.as_ref(), caller_ctx) +} + +/// File status +pub fn fstat(fd: FileHandle, user_buf: UserSliceWo) -> Result<()> { + file_op_generic_ext(fd, |scheme, _, desc| { + scheme.kfstat(desc.number, user_buf)?; + + // TODO: Ensure only the kernel can access the stat when st_dev is set, or use another API + // for retrieving the scheme ID from a file descriptor. + // TODO: Less hacky method. + let st_dev = desc + .scheme + .get() + .try_into() + .map_err(|_| Error::new(EOVERFLOW))?; + user_buf + .advance(core::mem::offset_of!(Stat, st_dev)) + .and_then(|b| b.limit(8)) + .ok_or(Error::new(EIO))? + .copy_from_slice(&u64::to_ne_bytes(st_dev))?; + + Ok(()) + }) } -pub fn funmap(virtual_address: usize) -> Result { - if virtual_address == 0 { - Ok(0) +pub fn funmap(virtual_address: usize, length: usize) -> Result { + // Partial lengths in funmap are allowed according to POSIX, but not particularly meaningful; + // since the memory needs to SIGSEGV if later read, the entire page needs to disappear. + // + // Thus, while (temporarily) allowing unaligned lengths for compatibility, aligning the length + // should be done by libc. + + let length_aligned = length.next_multiple_of(PAGE_SIZE); + if length != length_aligned { + log::warn!( + "funmap passed length {:#x} instead of {:#x}", + length, + length_aligned + ); + } + + let addr_space = Arc::clone(context::current().read().addr_space()?); + let span = PageSpan::validate_nonempty(VirtualAddress::new(virtual_address), length_aligned) + .ok_or(Error::new(EINVAL))?; + let unpin = false; + let notify = addr_space.munmap(span, unpin)?; + + for map in notify { + let _ = map.unmap(); + } + + Ok(0) +} + +pub fn mremap( + old_address: usize, + old_size: usize, + new_address: usize, + new_size: usize, + flags: usize, +) -> Result { + if old_address % PAGE_SIZE != 0 + || old_size % PAGE_SIZE != 0 + || new_address % PAGE_SIZE != 0 + || new_size % PAGE_SIZE != 0 + { + return Err(Error::new(EINVAL)); + } + if old_size == 0 || new_size == 0 { + return Err(Error::new(EINVAL)); + } + + let old_base = Page::containing_address(VirtualAddress::new(old_address)); + let new_base = Page::containing_address(VirtualAddress::new(new_address)); + + let mremap_flags = MremapFlags::from_bits_truncate(flags); + let prot_flags = MapFlags::from_bits_truncate(flags) + & (MapFlags::PROT_READ | MapFlags::PROT_WRITE | MapFlags::PROT_EXEC); + + let map_flags = if mremap_flags.contains(MremapFlags::FIXED_REPLACE) { + MapFlags::MAP_FIXED + } else if mremap_flags.contains(MremapFlags::FIXED) { + MapFlags::MAP_FIXED_NOREPLACE } else { - let mut desc_opt = None; + MapFlags::empty() + } | prot_flags; + + let addr_space = AddrSpace::current()?; + let src_span = PageSpan::new(old_base, old_size.div_ceil(PAGE_SIZE)); + let new_page_count = new_size.div_ceil(PAGE_SIZE); + let requested_dst_base = Some(new_base).filter(|_| new_address != 0); + + if mremap_flags.contains(MremapFlags::KEEP_OLD) { + // TODO: This is a hack! Find a better interface for replacing this, perhaps a capability + // for non-CoW-borrowed i.e. owned frames, that can be inserted into address spaces. + if new_page_count != 1 { + return Err(Error::new(EOPNOTSUPP)); + } - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut grants = context.grants.lock(); - - for i in 0 .. grants.len() { - let start = grants[i].start_address().get(); - let end = start + grants[i].size(); - if virtual_address >= start && virtual_address < end { - let mut grant = grants.remove(i); - desc_opt = grant.desc_opt.take(); - grant.unmap(); - break; + let raii_frame = addr_space.borrow_frame_enforce_rw_allocated(src_span.base)?; + + let base = addr_space.acquire_write().mmap( + &addr_space, + requested_dst_base, + NonZeroUsize::new(1).unwrap(), + map_flags, + &mut Vec::new(), + |page, page_flags, mapper, flusher| { + let frame = raii_frame.take(); + // XXX: add_ref(RefKind::Shared) is internally done by borrow_frame_enforce_rw_allocated(src_span.base). + // The page does not get unref-ed as we call take() on the `raii_frame`. + unsafe { + mapper + .map_phys(page.start_address(), frame.base(), page_flags) + .ok_or(Error::new(ENOMEM))? + .ignore(); + + flusher.queue(frame, None, TlbShootdownActions::NEW_MAPPING); } - } - } - if let Some(desc) = desc_opt { - let scheme_id = { - let description = desc.description.read(); - description.scheme - }; + Ok(Grant::allocated_one_page_nomap(page, page_flags)) + }, + )?; - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(scheme_id).ok_or(Error::new(EBADF))?; - scheme.clone() - }; - let res = scheme.funmap(virtual_address); + Ok(base.start_address().data()) + } else { + let base = addr_space.r#move( + None, + src_span, + requested_dst_base, + new_page_count, + map_flags, + &mut Vec::new(), + )?; + + Ok(base.start_address().data()) + } +} - let _ = desc.close(); +pub fn lseek(fd: FileHandle, pos: i64, whence: usize) -> Result { + enum Ret { + Legacy(usize), + Fsize((Option, Arc>)), + } + let fsize_or_legacy = file_op_generic_ext(fd, |scheme, desc_arc, desc| { + Ok( + if let Some(new_off) = scheme.legacy_seek(desc.number, pos as isize, whence) { + Ret::Legacy(new_off?) + } else if whence == SEEK_END { + Ret::Fsize((Some(scheme.fsize(desc.number)?), desc_arc)) + } else { + Ret::Fsize((None, desc_arc)) + }, + ) + })?; + let (fsize, desc) = match fsize_or_legacy { + Ret::Fsize(fsize) => fsize, + Ret::Legacy(new_pos) => return Ok(new_pos), + }; - res + let mut guard = desc.write(); + + let new_pos = match whence { + SEEK_SET => pos, + SEEK_CUR => pos + .checked_add_unsigned(guard.offset) + .ok_or(Error::new(EOVERFLOW))?, + SEEK_END => pos + .checked_add_unsigned(fsize.unwrap()) + .ok_or(Error::new(EOVERFLOW))?, + _ => return Err(Error::new(EINVAL)), + }; + guard.offset = new_pos.try_into().map_err(|_| Error::new(EINVAL))?; + + Ok(guard.offset as usize) +} +pub fn sys_read(fd: FileHandle, buf: UserSliceWo) -> Result { + let (bytes_read, desc_arc, desc) = file_op_generic_ext(fd, |scheme, desc_arc, desc| { + let offset = if desc.internal_flags.contains(InternalFlags::POSITIONED) { + desc.offset } else { - Err(Error::new(EFAULT)) + u64::MAX + }; + Ok(( + scheme.kreadoff(desc.number, buf, offset, desc.flags, desc.flags)?, + desc_arc, + desc, + )) + })?; + if desc.internal_flags.contains(InternalFlags::POSITIONED) { + match desc_arc.write().offset { + ref mut offset => *offset = offset.saturating_add(bytes_read as u64), + } + } + Ok(bytes_read) +} +pub fn sys_write(fd: FileHandle, buf: UserSliceRo) -> Result { + let (bytes_written, desc_arc, desc) = file_op_generic_ext(fd, |scheme, desc_arc, desc| { + let offset = if desc.internal_flags.contains(InternalFlags::POSITIONED) { + desc.offset + } else { + u64::MAX + }; + Ok(( + scheme.kwriteoff(desc.number, buf, offset, desc.flags, desc.flags)?, + desc_arc, + desc, + )) + })?; + if desc.internal_flags.contains(InternalFlags::POSITIONED) { + match desc_arc.write().offset { + ref mut offset => *offset = offset.saturating_add(bytes_written as u64), } } + Ok(bytes_written) } diff --git a/src/syscall/futex.rs b/src/syscall/futex.rs index 95da2532..2d053d46 100644 --- a/src/syscall/futex.rs +++ b/src/syscall/futex.rs @@ -2,145 +2,199 @@ //! Futex or Fast Userspace Mutex is "a method for waiting until a certain condition becomes true." //! //! For more information about futexes, please read [this](https://eli.thegreenplace.net/2018/basics-of-futexes/) blog post, and the [futex(2)](http://man7.org/linux/man-pages/man2/futex.2.html) man page -use alloc::sync::Arc; -use alloc::collections::VecDeque; -use core::intrinsics; -use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; - -use crate::context::{self, Context}; -use crate::time; -use crate::syscall::data::TimeSpec; -use crate::syscall::error::{Error, Result, ESRCH, EAGAIN, EINVAL}; -use crate::syscall::flag::{FUTEX_WAIT, FUTEX_WAKE, FUTEX_REQUEUE}; -use crate::syscall::validate::{validate_slice, validate_slice_mut}; - -type FutexList = VecDeque<(usize, Arc>)>; - -/// Fast userspace mutex list -static FUTEXES: Once> = Once::new(); - -/// Initialize futexes, called if needed -fn init_futexes() -> RwLock { - RwLock::new(VecDeque::new()) +use alloc::{ + collections::VecDeque, + sync::{Arc, Weak}, +}; +use core::sync::atomic::{AtomicU32, Ordering}; +use rmm::Arch; +use spin::RwLock; +use spinning_top::RwSpinlock; +use syscall::EINTR; + +use crate::{ + context::{ + self, + memory::{AddrSpace, AddrSpaceWrapper}, + Context, + }, + memory::PhysicalAddress, + paging::{Page, VirtualAddress}, + time, +}; + +use crate::syscall::{ + data::TimeSpec, + error::{Error, Result, EAGAIN, EFAULT, EINVAL, ETIMEDOUT}, + flag::{FUTEX_WAIT, FUTEX_WAIT64, FUTEX_WAKE}, +}; + +use super::usercopy::UserSlice; + +type FutexList = VecDeque; + +pub struct FutexEntry { + // Physical address, required if synchronizing across address spaces (necessitates MAP_SHARED + // since CoW would invalidate this address). + target_physaddr: PhysicalAddress, + // Virtual address, required if synchronizing across the same address space, if the memory is + // CoW. + // TODO: FUTEX_REQUEUE + target_virtaddr: VirtualAddress, + // Context to wake up, and compare address spaces. + context_lock: Arc>, + // address space to check against if virt matches but not phys + addr_space: Weak, } -/// Get the global futexes list, const -pub fn futexes() -> RwLockReadGuard<'static, FutexList> { - FUTEXES.call_once(init_futexes).read() -} +// TODO: Process-private futexes? In that case, put the futex table in each AddrSpace, or just +// implement that fully in userspace. Although futex is probably the best API for process-shared +// POSIX synchronization primitives, a local hash table and wait-for-thread kernel APIs (e.g. +// lwp_park/lwp_unpark from NetBSD) could be a simpler replacement. +// +// TODO: Use an actual hash table. +static FUTEXES: RwLock = RwLock::new(FutexList::new()); + +fn validate_and_translate_virt(space: &AddrSpace, addr: VirtualAddress) -> Option { + // TODO: Move this elsewhere! + if addr.data().saturating_add(core::mem::size_of::()) >= crate::USER_END_OFFSET { + return None; + } + + let page = Page::containing_address(addr); + let off = addr.data() - page.start_address().data(); -/// Get the global futexes list, mutable -pub fn futexes_mut() -> RwLockWriteGuard<'static, FutexList> { - FUTEXES.call_once(init_futexes).write() + let (frame, _) = space.table.utable.translate(page.start_address())?; + + Some(frame.add(off)) } -pub fn futex(addr: &mut i32, op: usize, val: i32, val2: usize, addr2: *mut i32) -> Result { +pub fn futex(addr: usize, op: usize, val: usize, val2: usize, _addr2: usize) -> Result { + let current_addrsp = AddrSpace::current()?; + + // Keep the address space locked so we can safely read from the physical address. Unlock it + // before context switching. + let addr_space_guard = current_addrsp.acquire_read(); + + let target_virtaddr = VirtualAddress::new(addr); + let target_physaddr = validate_and_translate_virt(&*addr_space_guard, target_virtaddr) + .ok_or(Error::new(EFAULT))?; + match op { - FUTEX_WAIT => { - let timeout_opt = if val2 != 0 { - Some(validate_slice(val2 as *const TimeSpec, 1).map(|req| &req[0])?) - } else { - None - }; + // TODO: FUTEX_WAIT_MULTIPLE? + FUTEX_WAIT | FUTEX_WAIT64 => { + let timeout_opt = UserSlice::ro(val2, core::mem::size_of::())? + .none_if_null() + .map(|buf| unsafe { buf.read_exact::() }) + .transpose()?; { - let mut futexes = futexes_mut(); + let mut futexes = FUTEXES.write(); - let context_lock = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - Arc::clone(&context_lock) - }; + let context_lock = context::current(); + + let (fetched, expected) = if op == FUTEX_WAIT { + // Must be aligned, otherwise it could cross a page boundary and mess up the + // (simpler) validation we did in the first place. + if addr % 4 != 0 { + return Err(Error::new(EINVAL)); + } - if unsafe { intrinsics::atomic_load(addr) != val } { + // On systems where virtual memory is not abundant, we might instead add an + // atomic usercopy function. + let accessible_addr = + unsafe { crate::paging::RmmA::phys_to_virt(target_physaddr) }.data(); + + ( + u64::from(unsafe { + (*(accessible_addr as *const AtomicU32)).load(Ordering::SeqCst) + }), + u64::from(val as u32), + ) + } else { + #[cfg(target_has_atomic = "64")] + { + use core::sync::atomic::AtomicU64; + + // op == FUTEX_WAIT64 + if addr % 8 != 0 { + return Err(Error::new(EINVAL)); + } + ( + u64::from(unsafe { + (*(addr as *const AtomicU64)).load(Ordering::SeqCst) + }), + val as u64, + ) + } + #[cfg(not(target_has_atomic = "64"))] + { + return Err(Error::new(crate::syscall::error::EOPNOTSUPP)); + } + }; + if fetched != expected { return Err(Error::new(EAGAIN)); } { let mut context = context_lock.write(); - if let Some(timeout) = timeout_opt { - let start = time::monotonic(); - let sum = start.1 + timeout.tv_nsec as u64; - let end = (start.0 + timeout.tv_sec as u64 + sum / 1_000_000_000, sum % 1_000_000_000); - context.wake = Some(end); + context.wake = timeout_opt.map(|TimeSpec { tv_sec, tv_nsec }| { + tv_sec as u128 * time::NANOS_PER_SEC + tv_nsec as u128 + }); + if let Some((tctl, pctl, _)) = context.sigcontrol() { + if tctl.currently_pending_unblocked(pctl) != 0 { + return Err(Error::new(EINTR)); + } } - context.block(); + context.block("futex"); } - futexes.push_back((addr as *mut i32 as usize, context_lock)); + futexes.push_back(FutexEntry { + target_physaddr, + target_virtaddr, + context_lock, + addr_space: Arc::downgrade(¤t_addrsp), + }); } - unsafe { context::switch(); } + drop(addr_space_guard); - if timeout_opt.is_some() { - let context_lock = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - Arc::clone(&context_lock) - }; + context::switch(); - { - let mut context = context_lock.write(); - context.wake = None; - } + if timeout_opt.is_some() { + context::current().write().wake = None; + Err(Error::new(ETIMEDOUT)) + } else { + Ok(0) } - - Ok(0) - }, + } FUTEX_WAKE => { let mut woken = 0; { - let mut futexes = futexes_mut(); + let mut futexes = FUTEXES.write(); let mut i = 0; - while i < futexes.len() && (woken as i32) < val { - if futexes[i].0 == addr as *mut i32 as usize { - if let Some(futex) = futexes.swap_remove_back(i) { - futex.1.write().unblock(); - woken += 1; - } - } else { - i += 1; - } - } - } - Ok(woken) - }, - FUTEX_REQUEUE => { - let addr2_safe = validate_slice_mut(addr2, 1).map(|addr2_safe| &mut addr2_safe[0])?; - - let mut woken = 0; - let mut requeued = 0; - - { - let mut futexes = futexes_mut(); - - let mut i = 0; - while i < futexes.len() && (woken as i32) < val { - if futexes[i].0 == addr as *mut i32 as usize { - if let Some(futex) = futexes.swap_remove_back(i) { - futex.1.write().unblock(); - woken += 1; - } - } else { + // TODO: Use something like retain, once it is possible to tell it when to stop iterating... + while i < futexes.len() && woken < val { + if futexes[i].target_physaddr != target_physaddr + && (futexes[i].target_virtaddr != target_virtaddr + || !Arc::downgrade(¤t_addrsp).ptr_eq(&futexes[i].addr_space)) + { i += 1; + continue; } - } - while i < futexes.len() && requeued < val2 { - if futexes[i].0 == addr as *mut i32 as usize { - futexes[i].0 = addr2_safe as *mut i32 as usize; - requeued += 1; - } - i += 1; + futexes[i].context_lock.write().unblock(); + futexes.swap_remove_back(i); + woken += 1; } } Ok(woken) - }, - _ => Err(Error::new(EINVAL)) + } + _ => Err(Error::new(EINVAL)), } } diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index f76f96b6..fdef2ed5 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -4,29 +4,36 @@ extern crate syscall; -pub use self::syscall::{data, error, flag, io, number, scheme}; +use core::mem::size_of; -pub use self::driver::*; -pub use self::fs::*; -pub use self::futex::futex; -pub use self::privilege::*; -pub use self::process::*; -pub use self::time::*; -pub use self::validate::*; +use syscall::{dirent::DirentHeader, CallFlags, RwFlags, EINVAL}; -use self::data::{SigAction, TimeSpec}; -use self::error::{Error, Result, ENOSYS}; -use self::number::*; +pub use self::syscall::{ + data, error, flag, io, number, ptrace_event, EnvRegisters, FloatRegisters, IntRegisters, +}; -use crate::context::ContextId; -use crate::macros::InterruptStack; -use crate::scheme::{FileHandle, SchemeNamespace}; +pub use self::{fs::*, futex::futex, privilege::*, process::*, time::*, usercopy::validate_region}; + +use self::{ + data::{Map, TimeSpec}, + error::{Error, Result, ENOSYS, EOVERFLOW}, + flag::{EventFlags, MapFlags}, + number::*, + usercopy::UserSlice, +}; + +use crate::percpu::PercpuBlock; + +use crate::{ + context::memory::AddrSpace, + scheme::{memory::MemoryScheme, FileHandle}, +}; /// Debug pub mod debug; -/// Driver syscalls -pub mod driver; +#[cfg(feature = "syscall_debug")] +use self::debug::{debug_end, debug_start}; /// Filesystem syscalls pub mod fs; @@ -43,200 +50,175 @@ pub mod process; /// Time syscalls pub mod time; -/// Validate input -pub mod validate; +/// Safely copying memory between user and kernel memory +pub mod usercopy; /// This function is the syscall handler of the kernel, it is composed of an inner function that returns a `Result`. After the inner function runs, the syscall /// function calls [`Error::mux`] on it. -pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: usize, stack: &mut InterruptStack) -> usize { +#[must_use] +pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) -> usize { #[inline(always)] - fn inner(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: usize, stack: &mut InterruptStack) -> Result { + fn inner(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) -> Result { + let fd = FileHandle::from(b); //SYS_* is declared in kernel/syscall/src/number.rs - match a & SYS_CLASS { - SYS_CLASS_FILE => { - let fd = FileHandle::from(b); - match a & SYS_ARG { - SYS_ARG_SLICE => file_op_slice(a, fd, validate_slice(c as *const u8, d)?), - SYS_ARG_MSLICE => file_op_mut_slice(a, fd, validate_slice_mut(c as *mut u8, d)?), - _ => match a { - SYS_CLOSE => close(fd), - SYS_DUP => dup(fd, validate_slice(c as *const u8, d)?).map(FileHandle::into), - SYS_DUP2 => dup2(fd, FileHandle::from(c), validate_slice(d as *const u8, e)?).map(FileHandle::into), - SYS_FCNTL => fcntl(fd, c, d), - SYS_FEXEC => fexec(fd, validate_slice(c as *const [usize; 2], d)?, validate_slice(e as *const [usize; 2], f)?), - SYS_FRENAME => frename(fd, validate_slice(c as *const u8, d)?), - SYS_FUNMAP => funmap(b), - _ => file_op(a, fd, c, d) - } + match a { + SYS_WRITE2 => file_op_generic_ext(fd, |scheme, _, desc| { + let flags = if f == usize::MAX { + None + } else { + Some( + u32::try_from(f) + .ok() + .and_then(RwFlags::from_bits) + .ok_or(Error::new(EINVAL))?, + ) + }; + scheme.kwriteoff( + desc.number, + UserSlice::ro(c, d)?, + e as u64, + flags.map_or(desc.flags, |f| desc.rw_flags(f)), + desc.flags, + ) + }), + SYS_WRITE => sys_write(fd, UserSlice::ro(c, d)?), + SYS_FMAP => { + let addrspace = AddrSpace::current()?; + let map = unsafe { UserSlice::ro(c, d)?.read_exact::()? }; + if b == !0 { + MemoryScheme::fmap_anonymous(&addrspace, &map, false) + } else { + file_op_generic(fd, |scheme, number| { + scheme.kfmap(number, &addrspace, &map, false) + }) } - }, - SYS_CLASS_PATH => match a { - SYS_OPEN => open(validate_slice(b as *const u8, c)?, d).map(FileHandle::into), - SYS_CHMOD => chmod(validate_slice(b as *const u8, c)?, d as u16), - SYS_RMDIR => rmdir(validate_slice(b as *const u8, c)?), - SYS_UNLINK => unlink(validate_slice(b as *const u8, c)?), - _ => Err(Error::new(ENOSYS)) - }, - _ => match a { - SYS_YIELD => sched_yield(), - SYS_NANOSLEEP => nanosleep( - validate_slice(b as *const TimeSpec, 1).map(|req| &req[0])?, - if c == 0 { - None - } else { - Some(validate_slice_mut(c as *mut TimeSpec, 1).map(|rem| &mut rem[0])?) - } - ), - SYS_CLOCK_GETTIME => clock_gettime(b, validate_slice_mut(c as *mut TimeSpec, 1).map(|time| &mut time[0])?), - SYS_FUTEX => futex(validate_slice_mut(b as *mut i32, 1).map(|uaddr| &mut uaddr[0])?, c, d as i32, e, f as *mut i32), - SYS_BRK => brk(b), - SYS_GETPID => getpid().map(ContextId::into), - SYS_GETPGID => getpgid(ContextId::from(b)).map(ContextId::into), - SYS_GETPPID => getppid().map(ContextId::into), - SYS_CLONE => { - let old_rsp = stack.iret.rsp; - if b & flag::CLONE_STACK == flag::CLONE_STACK { - stack.iret.rsp = c; - } - let ret = clone(b, bp).map(ContextId::into); - stack.iret.rsp = old_rsp; - ret - }, - SYS_EXIT => exit((b & 0xFF) << 8), - SYS_KILL => kill(ContextId::from(b), c), - SYS_WAITPID => waitpid(ContextId::from(b), c, d).map(ContextId::into), - SYS_CHDIR => chdir(validate_slice(b as *const u8, c)?), - SYS_IOPL => iopl(b, stack), - SYS_GETCWD => getcwd(validate_slice_mut(b as *mut u8, c)?), - SYS_GETEGID => getegid(), - SYS_GETENS => getens(), - SYS_GETEUID => geteuid(), - SYS_GETGID => getgid(), - SYS_GETNS => getns(), - SYS_GETUID => getuid(), - SYS_MPROTECT => mprotect(b, c, d), - SYS_MKNS => mkns(validate_slice(b as *const [usize; 2], c)?), - SYS_SETPGID => setpgid(ContextId::from(b), ContextId::from(c)), - SYS_SETREUID => setreuid(b as u32, c as u32), - SYS_SETRENS => setrens(SchemeNamespace::from(b), SchemeNamespace::from(c)), - SYS_SETREGID => setregid(b as u32, c as u32), - SYS_SIGACTION => sigaction( - b, - if c == 0 { - None - } else { - Some(validate_slice(c as *const SigAction, 1).map(|act| &act[0])?) - }, - if d == 0 { - None - } else { - Some(validate_slice_mut(d as *mut SigAction, 1).map(|oldact| &mut oldact[0])?) - }, - e - ), - SYS_SIGPROCMASK => sigprocmask( - b, - if c == 0 { - None - } else { - Some(validate_slice(c as *const [u64; 2], 1).map(|s| &s[0])?) - }, - if d == 0 { - None - } else { - Some(validate_slice_mut(d as *mut [u64; 2], 1).map(|s| &mut s[0])?) - } - ), - SYS_SIGRETURN => sigreturn(), - SYS_PIPE2 => pipe2(validate_slice_mut(b as *mut usize, 2)?, c), - SYS_PHYSALLOC => physalloc(b), - SYS_PHYSFREE => physfree(b, c), - SYS_PHYSMAP => physmap(b, c, d), - SYS_PHYSUNMAP => physunmap(b), - SYS_UMASK => umask(b), - SYS_VIRTTOPHYS => virttophys(b), - _ => Err(Error::new(ENOSYS)) } - } - } + SYS_GETDENTS => { + let header_size = u16::try_from(e).map_err(|_| Error::new(EINVAL))?; - /* - let debug = { - let contexts = crate::context::contexts(); - if let Some(context_lock) = contexts.current() { - let context = context_lock.read(); - let name_raw = context.name.lock(); - let name = unsafe { core::str::from_utf8_unchecked(&name_raw) }; - if name == "file:/bin/cargo" || name == "file:/bin/rustc" { - if a == SYS_CLOCK_GETTIME { - false - } else if (a == SYS_WRITE || a == SYS_FSYNC) && (b == 1 || b == 2) { - false - } else { - true + if usize::from(header_size) != size_of::() { + // TODO: allow? If so, zero_out must be implemented for UserSlice + return Err(Error::new(EINVAL)); } - } else { - false + + file_op_generic(fd, |scheme, number| { + scheme.getdents(number, UserSlice::wo(c, d)?, header_size, f as u64) + }) } - } else { - false - } - }; + SYS_FUTIMENS => file_op_generic(fd, |scheme, number| { + scheme.kfutimens(number, UserSlice::ro(c, d)?) + }), - if debug { - let contexts = crate::context::contexts(); - if let Some(context_lock) = contexts.current() { - let context = context_lock.read(); - print!("{} ({}): ", unsafe { core::str::from_utf8_unchecked(&context.name.lock()) }, context.id.into()); - } + SYS_READ2 => file_op_generic_ext(fd, |scheme, _, desc| { + let flags = if f == usize::MAX { + None + } else { + Some( + u32::try_from(f) + .ok() + .and_then(RwFlags::from_bits) + .ok_or(Error::new(EINVAL))?, + ) + }; + scheme.kreadoff( + desc.number, + UserSlice::wo(c, d)?, + e as u64, + flags.map_or(desc.flags, |f| desc.rw_flags(f)), + desc.flags, + ) + }), + SYS_READ => sys_read(fd, UserSlice::wo(c, d)?), + SYS_FPATH => file_op_generic(fd, |scheme, number| { + scheme.kfpath(number, UserSlice::wo(c, d)?) + }), + SYS_FSTAT => fstat(fd, UserSlice::wo(c, d)?).map(|()| 0), + SYS_FSTATVFS => file_op_generic(fd, |scheme, number| { + scheme.kfstatvfs(number, UserSlice::wo(c, d)?).map(|()| 0) + }), + + SYS_DUP => dup(fd, UserSlice::ro(c, d)?).map(FileHandle::into), + SYS_DUP2 => dup2(fd, FileHandle::from(c), UserSlice::ro(d, e)?).map(FileHandle::into), + + #[cfg(target_pointer_width = "32")] + SYS_SENDFD => sendfd(fd, FileHandle::from(c), d, e as u64 | ((f as u64) << 32)), + + #[cfg(target_pointer_width = "64")] + SYS_SENDFD => sendfd(fd, FileHandle::from(c), d, e as u64), + + SYS_LSEEK => lseek(fd, c as i64, d), + SYS_FCHMOD => file_op_generic(fd, |scheme, number| { + scheme.fchmod(number, c as u16).map(|()| 0) + }), + SYS_FCHOWN => file_op_generic(fd, |scheme, number| { + scheme.fchown(number, c as u32, d as u32).map(|()| 0) + }), + SYS_FCNTL => fcntl(fd, c, d), + SYS_FEVENT => file_op_generic(fd, |scheme, number| { + Ok(scheme + .fevent(number, EventFlags::from_bits_truncate(c))? + .bits()) + }), + SYS_FLINK => flink(fd, UserSlice::ro(c, d)?).map(|()| 0), + SYS_FRENAME => frename(fd, UserSlice::ro(c, d)?).map(|()| 0), + SYS_FUNMAP => funmap(b, c), + + SYS_FSYNC => file_op_generic(fd, |scheme, number| scheme.fsync(number).map(|()| 0)), + // TODO: 64-bit lengths on 32-bit platforms + SYS_FTRUNCATE => { + file_op_generic(fd, |scheme, number| scheme.ftruncate(number, c).map(|()| 0)) + } - println!("{}", debug::format_call(a, b, c, d, e, f)); - } - */ - - // The next lines set the current syscall in the context struct, then once the inner() function - // completes, we set the current syscall to none. - // - // When the code below falls out of scope it will release the lock - // see the spin crate for details - { - let contexts = crate::context::contexts(); - if let Some(context_lock) = contexts.current() { - let mut context = context_lock.write(); - context.syscall = Some((a, b, c, d, e, f)); - } - } + SYS_CLOSE => close(fd).map(|()| 0), + SYS_CALL => call( + fd, + UserSlice::rw(c, d)?, + CallFlags::from_bits(e & !0xff).ok_or(Error::new(EINVAL))?, + UserSlice::ro(f, (e & 0xff) * 8)?, + ), + + SYS_OPEN => open(UserSlice::ro(b, c)?, d).map(FileHandle::into), + SYS_RMDIR => rmdir(UserSlice::ro(b, c)?).map(|()| 0), + SYS_UNLINK => unlink(UserSlice::ro(b, c)?).map(|()| 0), + SYS_YIELD => sched_yield().map(|()| 0), + SYS_NANOSLEEP => nanosleep( + UserSlice::ro(b, core::mem::size_of::())?, + UserSlice::wo(c, core::mem::size_of::())?.none_if_null(), + ) + .map(|()| 0), + SYS_CLOCK_GETTIME => { + clock_gettime(b, UserSlice::wo(c, core::mem::size_of::())?).map(|()| 0) + } + SYS_FUTEX => futex(b, c, d, e, f), - let result = inner(a, b, c, d, e, f, bp, stack); + SYS_MPROTECT => mprotect(b, c, MapFlags::from_bits_truncate(d)).map(|()| 0), + SYS_MKNS => mkns(UserSlice::ro( + b, + c.checked_mul(core::mem::size_of::<[usize; 2]>()) + .ok_or(Error::new(EOVERFLOW))?, + )?), + SYS_MREMAP => mremap(b, c, d, e, f), - { - let contexts = crate::context::contexts(); - if let Some(context_lock) = contexts.current() { - let mut context = context_lock.write(); - context.syscall = None; + _ => return Err(Error::new(ENOSYS)), } } - /* - if debug { - let contexts = crate::context::contexts(); - if let Some(context_lock) = contexts.current() { - let context = context_lock.read(); - print!("{} ({}): ", unsafe { core::str::from_utf8_unchecked(&context.name.lock()) }, context.id.into()); - } + PercpuBlock::current().inside_syscall.set(true); - print!("{} = ", debug::format_call(a, b, c, d, e, f)); + #[cfg(feature = "syscall_debug")] + debug_start([a, b, c, d, e, f]); - match result { - Ok(ref ok) => { - println!("Ok({} ({:#X}))", ok, ok); - }, - Err(ref err) => { - println!("Err({} ({:#X}))", err, err.errno); - } - } + let result = inner(a, b, c, d, e, f); + + #[cfg(feature = "syscall_debug")] + debug_end([a, b, c, d, e, f], result); + + let percpu = PercpuBlock::current(); + percpu.inside_syscall.set(false); + + if percpu.switch_internals.being_sigkilled.get() { + exit_this_context(None); } - */ // errormux turns Result into -errno Error::mux(result) diff --git a/src/syscall/privilege.rs b/src/syscall/privilege.rs index 2ad14d53..7b8fed4b 100644 --- a/src/syscall/privilege.rs +++ b/src/syscall/privilege.rs @@ -1,237 +1,41 @@ use alloc::vec::Vec; -use crate::context; -use crate::scheme::{self, SchemeNamespace}; -use crate::syscall::error::*; -use crate::syscall::validate::validate_slice; +use crate::{context, scheme, syscall::error::*}; -pub fn getegid() -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - Ok(context.egid as usize) -} - -pub fn getens() -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - Ok(context.ens.into()) -} - -pub fn geteuid() -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - Ok(context.euid as usize) -} - -pub fn getgid() -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - Ok(context.rgid as usize) -} - -pub fn getns() -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - Ok(context.rns.into()) -} - -pub fn getuid() -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - Ok(context.ruid as usize) -} - -pub fn mkns(name_ptrs: &[[usize; 2]]) -> Result { - let mut names = Vec::new(); - for name_ptr in name_ptrs { - names.push(validate_slice(name_ptr[0] as *const u8, name_ptr[1])?); - } +use super::{ + copy_path_to_buf, + usercopy::{UserSlice, UserSliceRo}, +}; - let (uid, from) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.euid, context.ens) +pub fn mkns(mut user_buf: UserSliceRo) -> Result { + let (uid, from) = match context::current().read() { + ref cx => (cx.euid, cx.ens), }; - if uid == 0 { - let to = scheme::schemes_mut().make_ns(from, &names)?; - Ok(to.into()) - } else { - Err(Error::new(EACCES)) - } -} - -pub fn setregid(rgid: u32, egid: u32) -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - - let setrgid = - if context.euid == 0 { - // Allow changing RGID if root - true - } else if rgid == context.egid { - // Allow changing RGID if used for EGID - true - } else if rgid == context.rgid { - // Allow changing RGID if used for RGID - true - } else if rgid as i32 == -1 { - // Ignore RGID if -1 is passed - false - } else { - // Not permitted otherwise - return Err(Error::new(EPERM)); - }; - - let setegid = - if context.euid == 0 { - // Allow changing EGID if root - true - } else if egid == context.egid { - // Allow changing EGID if used for EGID - true - } else if egid == context.rgid { - // Allow changing EGID if used for RGID - true - } else if egid as i32 == -1 { - // Ignore EGID if -1 is passed - false - } else { - // Not permitted otherwise - return Err(Error::new(EPERM)); - }; - - if setrgid { - context.rgid = rgid; - } - - if setegid { - context.egid = egid; - } - - Ok(0) -} - -pub fn setrens(rns: SchemeNamespace, ens: SchemeNamespace) -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - - let setrns = - if rns.into() == 0 { - // Allow entering capability mode - true - } else if context.rns.into() == 0 { - // Do not allow leaving capability mode - return Err(Error::new(EPERM)); - } else if context.euid == 0 { - // Allow setting RNS if root - true - } else if rns == context.ens { - // Allow setting RNS if used for ENS - true - } else if rns == context.rns { - // Allow setting RNS if used for RNS - true - } else if rns.into() as isize == -1 { - // Ignore RNS if -1 is passed - false - } else { - // Not permitted otherwise - return Err(Error::new(EPERM)); - }; - - let setens = - if ens.into() == 0 { - // Allow entering capability mode - true - } else if context.ens.into() == 0 { - // Do not allow leaving capability mode - return Err(Error::new(EPERM)); - } else if context.euid == 0 { - // Allow setting ENS if root - true - } else if ens == context.ens { - // Allow setting ENS if used for ENS - true - } else if ens == context.rns { - // Allow setting ENS if used for RNS - true - } else if ens.into() as isize == -1 { - // Ignore ENS if -1 is passed - false - } else { - // Not permitted otherwise - return Err(Error::new(EPERM)); - }; - - if setrns { - context.rns = rns; + // TODO: Lift this restriction later? + if uid != 0 { + return Err(Error::new(EACCES)); } - if setens { - context.ens = ens; - } - - Ok(0) -} + let mut names = Vec::with_capacity(user_buf.len() / core::mem::size_of::<[usize; 2]>()); -pub fn setreuid(ruid: u32, euid: u32) -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); + while let Some((current_name_ptr_buf, next_part)) = + user_buf.split_at(core::mem::size_of::<[usize; 2]>()) + { + let mut iter = current_name_ptr_buf.usizes(); + let ptr = iter.next().ok_or(Error::new(EINVAL))??; + let len = iter.next().ok_or(Error::new(EINVAL))??; - let setruid = - if context.euid == 0 { - // Allow setting RUID if root - true - } else if ruid == context.euid { - // Allow setting RUID if used for EUID - true - } else if ruid == context.ruid { - // Allow setting RUID if used for RUID - true - } else if ruid as i32 == -1 { - // Ignore RUID if -1 is passed - false - } else { - // Not permitted otherwise - return Err(Error::new(EPERM)); - }; + let raw_path = UserSlice::new(ptr, len)?; - let seteuid = - if context.euid == 0 { - // Allow setting EUID if root - true - } else if euid == context.euid { - // Allow setting EUID if used for EUID - true - } else if euid == context.ruid { - // Allow setting EUID if used for RUID - true - } else if euid as i32 == -1 { - // Ignore EUID if -1 is passed - false - } else { - // Not permitted otherwise - return Err(Error::new(EPERM)); - }; + // TODO: Max scheme size limit? + let max_len = 256; - if setruid { - context.ruid = ruid; - } + names.push(copy_path_to_buf(raw_path, max_len)?.into_boxed_str()); - if seteuid { - context.euid = euid; + user_buf = next_part; } - Ok(0) + let to = scheme::schemes_mut().make_ns(from, names)?; + Ok(to.into()) } diff --git a/src/syscall/process.rs b/src/syscall/process.rs index e683b4cf..5f4777ed 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -1,1624 +1,148 @@ -use alloc::sync::Arc; -use alloc::boxed::Box; -use alloc::vec::Vec; -use core::alloc::{GlobalAlloc, Layout}; -use core::{intrinsics, mem}; -use core::ops::DerefMut; -use spin::Mutex; - -use crate::context::file::FileDescriptor; -use crate::context::{ContextId, WaitpidKey}; -use crate::context; -#[cfg(not(feature="doc"))] -use crate::elf::{self, program_header}; -use crate::interrupt; -use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::allocate_frames; -use crate::paging::entry::EntryFlags; -use crate::paging::mapper::MapperFlushAll; -use crate::paging::temporary_page::TemporaryPage; -use crate::paging::{ActivePageTable, InactivePageTable, Page, VirtualAddress, PAGE_SIZE}; -use crate::ptrace; -use crate::scheme::FileHandle; -use crate::start::usermode; -use crate::syscall::data::{PtraceEvent, PtraceEventData, SigAction, Stat}; -use crate::syscall::error::*; -use crate::syscall::flag::{CLONE_VFORK, CLONE_VM, CLONE_FS, CLONE_FILES, CLONE_SIGHAND, CLONE_STACK, - PROT_EXEC, PROT_READ, PROT_WRITE, PTRACE_EVENT_CLONE, - SIG_DFL, SIG_BLOCK, SIG_UNBLOCK, SIG_SETMASK, SIGCONT, SIGTERM, - WCONTINUED, WNOHANG, WUNTRACED, wifcontinued, wifstopped}; -use crate::syscall::validate::{validate_slice, validate_slice_mut}; -use crate::syscall; - -pub fn brk(address: usize) -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - //println!("{}: {}: BRK {:X}", unsafe { ::core::str::from_utf8_unchecked(&context.name.lock()) }, - // context.id.into(), address); - - let current = if let Some(ref heap_shared) = context.heap { - heap_shared.with(|heap| { - heap.start_address().get() + heap.size() - }) - } else { - panic!("user heap not initialized"); - }; - - if address == 0 { - //println!("Brk query {:X}", current); - Ok(current) - } else if address >= crate::USER_HEAP_OFFSET { - //TODO: out of memory errors - if let Some(ref heap_shared) = context.heap { - heap_shared.with(|heap| { - heap.resize(address - crate::USER_HEAP_OFFSET, true); - }); - } else { - panic!("user heap not initialized"); - } - - //println!("Brk resize {:X}", address); - Ok(address) - } else { - //println!("Brk no mem"); - Err(Error::new(ENOMEM)) - } -} - -pub fn clone(flags: usize, stack_base: usize) -> Result { - let ppid; - let pid; - { - let pgid; - let ruid; - let rgid; - let rns; - let euid; - let egid; - let ens; - let umask; - let sigmask; - let mut cpu_id = None; - let arch; - let vfork; - let mut kfx_option = None; - let mut kstack_option = None; - let mut offset = 0; - let mut image = vec![]; - let mut heap_option = None; - let mut stack_option = None; - let mut sigstack_option = None; - let mut tls_option = None; - let grants; - let name; - let cwd; - let files; - let actions; - - // Copy from old process - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - ppid = context.id; - pgid = context.pgid; - ruid = context.ruid; - rgid = context.rgid; - rns = context.rns; - euid = context.euid; - egid = context.egid; - ens = context.ens; - sigmask = context.sigmask; - umask = context.umask; - - if flags & CLONE_VM == CLONE_VM { - cpu_id = context.cpu_id; - } - - arch = context.arch.clone(); - - if let Some(ref fx) = context.kfx { - let mut new_fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(512, 16)) as *mut [u8; 512]) }; - for (new_b, b) in new_fx.iter_mut().zip(fx.iter()) { - *new_b = *b; - } - kfx_option = Some(new_fx); - } - - if let Some(ref stack) = context.kstack { - // Get the relative offset to the return address of this function - // (base pointer - start of stack) - one - offset = stack_base - stack.as_ptr() as usize - mem::size_of::(); // Add clone ret - let mut new_stack = stack.clone(); - - unsafe { - if let Some(regs) = ptrace::rebase_regs_ptr_mut(context.regs, Some(&mut new_stack)) { - // We'll need to tell the clone that it should - // return 0, but that's it. We don't actually - // clone the registers, because it will then - // become None and be exempt from all kinds of - // ptracing until the current syscall has - // completed. - (*regs).scratch.rax = 0; - } - - // Change the return address of the child - // (previously syscall) to the arch-specific - // clone_ret callback - let func_ptr = new_stack.as_mut_ptr().offset(offset as isize); - *(func_ptr as *mut usize) = interrupt::syscall::clone_ret as usize; - } - - kstack_option = Some(new_stack); - } - - if flags & CLONE_VM == CLONE_VM { - for memory_shared in context.image.iter() { - image.push(memory_shared.clone()); - } - - if let Some(ref heap_shared) = context.heap { - heap_option = Some(heap_shared.clone()); - } - } else { - for memory_shared in context.image.iter() { - memory_shared.with(|memory| { - let mut new_memory = context::memory::Memory::new( - VirtualAddress::new(memory.start_address().get() + crate::USER_TMP_OFFSET), - memory.size(), - EntryFlags::PRESENT | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE, - false - ); - - unsafe { - intrinsics::copy(memory.start_address().get() as *const u8, - new_memory.start_address().get() as *mut u8, - memory.size()); - } - - new_memory.remap(memory.flags()); - image.push(new_memory.to_shared()); - }); - } - - if let Some(ref heap_shared) = context.heap { - heap_shared.with(|heap| { - let mut new_heap = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_HEAP_OFFSET), - heap.size(), - EntryFlags::PRESENT | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE, - false - ); - - unsafe { - intrinsics::copy(heap.start_address().get() as *const u8, - new_heap.start_address().get() as *mut u8, - heap.size()); - } - - new_heap.remap(heap.flags()); - heap_option = Some(new_heap.to_shared()); - }); - } - } - - if let Some(ref stack_shared) = context.stack { - if flags & CLONE_STACK == CLONE_STACK { - stack_option = Some(stack_shared.clone()); - } else { - stack_shared.with(|stack| { - let mut new_stack = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_STACK_OFFSET), - stack.size(), - EntryFlags::PRESENT | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE, - false - ); - - unsafe { - intrinsics::copy(stack.start_address().get() as *const u8, - new_stack.start_address().get() as *mut u8, - stack.size()); - } - - new_stack.remap(stack.flags()); - stack_option = Some(new_stack.to_shared()); - }); - } - } - - if let Some(ref sigstack) = context.sigstack { - let mut new_sigstack = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_SIGSTACK_OFFSET), - sigstack.size(), - EntryFlags::PRESENT | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE, - false - ); - - unsafe { - intrinsics::copy(sigstack.start_address().get() as *const u8, - new_sigstack.start_address().get() as *mut u8, - sigstack.size()); - } - - new_sigstack.remap(sigstack.flags()); - sigstack_option = Some(new_sigstack); - } - - if let Some(ref tls) = context.tls { - let mut new_tls = context::memory::Tls { - master: tls.master, - file_size: tls.file_size, - mem: context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_TLS_OFFSET), - tls.mem.size(), - EntryFlags::PRESENT | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE, - true - ), - offset: tls.offset, - }; - - - if flags & CLONE_VM == CLONE_VM { - unsafe { - new_tls.load(); - } - } else { - unsafe { - intrinsics::copy(tls.mem.start_address().get() as *const u8, - new_tls.mem.start_address().get() as *mut u8, - tls.mem.size()); - } - } - - new_tls.mem.remap(tls.mem.flags()); - tls_option = Some(new_tls); - } - - if flags & CLONE_VM == CLONE_VM { - grants = Arc::clone(&context.grants); - } else { - let mut grants_vec = Vec::new(); - for grant in context.grants.lock().iter() { - let start = VirtualAddress::new(grant.start_address().get() + crate::USER_TMP_GRANT_OFFSET - crate::USER_GRANT_OFFSET); - grants_vec.push(grant.secret_clone(start)); - } - grants = Arc::new(Mutex::new(grants_vec)); - } - - if flags & CLONE_VM == CLONE_VM { - name = Arc::clone(&context.name); - } else { - name = Arc::new(Mutex::new(context.name.lock().clone())); - } - - if flags & CLONE_FS == CLONE_FS { - cwd = Arc::clone(&context.cwd); - } else { - cwd = Arc::new(Mutex::new(context.cwd.lock().clone())); - } - - if flags & CLONE_FILES == CLONE_FILES { - files = Arc::clone(&context.files); - } else { - files = Arc::new(Mutex::new(context.files.lock().clone())); - } - - if flags & CLONE_SIGHAND == CLONE_SIGHAND { - actions = Arc::clone(&context.actions); - } else { - actions = Arc::new(Mutex::new(context.actions.lock().clone())); - } - } - - // If not cloning files, dup to get a new number from scheme - // This has to be done outside the context lock to prevent deadlocks - if flags & CLONE_FILES == 0 { - for (_fd, file_option) in files.lock().iter_mut().enumerate() { - let new_file_option = if let Some(ref file) = *file_option { - Some(FileDescriptor { - description: Arc::clone(&file.description), - cloexec: file.cloexec, - }) - } else { - None - }; - - *file_option = new_file_option; - } - } - - // If not cloning virtual memory, use fmap to re-obtain every grant where possible - if flags & CLONE_VM == 0 { - let mut i = 0; - while i < grants.lock().len() { - let remove = false; - if let Some(grant) = grants.lock().get(i) { - if let Some(ref _desc) = grant.desc_opt { - println!("todo: clone grant {} using fmap: {:?}", i, grant); - } - } - if remove { - grants.lock().remove(i); - } else { - i += 1; - } - } - } - - // If vfork, block the current process - // This has to be done after the operations that may require context switches - if flags & CLONE_VFORK == CLONE_VFORK { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - context.block(); - vfork = true; - } else { - vfork = false; - } - - // Set up new process - { - let mut contexts = context::contexts_mut(); - let context_lock = contexts.new_context()?; - let mut context = context_lock.write(); - - pid = context.id; - - context.pgid = pgid; - context.ppid = ppid; - context.ruid = ruid; - context.rgid = rgid; - context.rns = rns; - context.euid = euid; - context.egid = egid; - context.ens = ens; - context.sigmask = sigmask; - context.umask = umask; - - context.cpu_id = cpu_id; - - context.status = context::Status::Runnable; - - context.vfork = vfork; - - context.arch = arch; - - let mut active_table = unsafe { ActivePageTable::new() }; - - let mut temporary_page = TemporaryPage::new(Page::containing_address(VirtualAddress::new(crate::USER_TMP_MISC_OFFSET))); - - let mut new_table = { - let frame = allocate_frames(1).expect("no more frames in syscall::clone new_table"); - InactivePageTable::new(frame, &mut active_table, &mut temporary_page) - }; - - context.arch.set_page_table(unsafe { new_table.address() }); - - // Copy kernel image mapping - { - let frame = active_table.p4()[crate::KERNEL_PML4].pointed_frame().expect("kernel image not mapped"); - let flags = active_table.p4()[crate::KERNEL_PML4].flags(); - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - mapper.p4_mut()[crate::KERNEL_PML4].set(frame, flags); - }); - } - - // Copy kernel heap mapping - { - let frame = active_table.p4()[crate::KERNEL_HEAP_PML4].pointed_frame().expect("kernel heap not mapped"); - let flags = active_table.p4()[crate::KERNEL_HEAP_PML4].flags(); - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - mapper.p4_mut()[crate::KERNEL_HEAP_PML4].set(frame, flags); - }); - } - - if let Some(fx) = kfx_option.take() { - context.arch.set_fx(fx.as_ptr() as usize); - context.kfx = Some(fx); - } - - // Set kernel stack - if let Some(stack) = kstack_option.take() { - context.arch.set_stack(stack.as_ptr() as usize + offset); - context.kstack = Some(stack); - } - - // TODO: Clone ksig? - - // Setup image, heap, and grants - if flags & CLONE_VM == CLONE_VM { - // Copy user image mapping, if found - if ! image.is_empty() { - let frame = active_table.p4()[crate::USER_PML4].pointed_frame().expect("user image not mapped"); - let flags = active_table.p4()[crate::USER_PML4].flags(); - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - mapper.p4_mut()[crate::USER_PML4].set(frame, flags); - }); - } - context.image = image; - - // Copy user heap mapping, if found - if let Some(heap_shared) = heap_option { - let frame = active_table.p4()[crate::USER_HEAP_PML4].pointed_frame().expect("user heap not mapped"); - let flags = active_table.p4()[crate::USER_HEAP_PML4].flags(); - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - mapper.p4_mut()[crate::USER_HEAP_PML4].set(frame, flags); - }); - context.heap = Some(heap_shared); - } - - // Copy grant mapping - if ! grants.lock().is_empty() { - let frame = active_table.p4()[crate::USER_GRANT_PML4].pointed_frame().expect("user grants not mapped"); - let flags = active_table.p4()[crate::USER_GRANT_PML4].flags(); - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - mapper.p4_mut()[crate::USER_GRANT_PML4].set(frame, flags); - }); - } - context.grants = grants; - } else { - // Copy percpu mapping - for cpu_id in 0..crate::cpu_count() { - extern { - // The starting byte of the thread data segment - static mut __tdata_start: u8; - // The ending byte of the thread BSS segment - static mut __tbss_end: u8; - } - - let size = unsafe { & __tbss_end as *const _ as usize - & __tdata_start as *const _ as usize }; - - let start = crate::KERNEL_PERCPU_OFFSET + crate::KERNEL_PERCPU_SIZE * cpu_id; - let end = start + size; - - let start_page = Page::containing_address(VirtualAddress::new(start)); - let end_page = Page::containing_address(VirtualAddress::new(end - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let frame = active_table.translate_page(page).expect("kernel percpu not mapped"); - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - let result = mapper.map_to(page, frame, EntryFlags::PRESENT | EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE); - // Ignore result due to operating on inactive table - unsafe { result.ignore(); } - }); - } - } - - // Move copy of image - for memory_shared in image.iter_mut() { - memory_shared.with(|memory| { - let start = VirtualAddress::new(memory.start_address().get() - crate::USER_TMP_OFFSET + crate::USER_OFFSET); - memory.move_to(start, &mut new_table, &mut temporary_page); - }); - } - context.image = image; - - // Move copy of heap - if let Some(heap_shared) = heap_option { - heap_shared.with(|heap| { - heap.move_to(VirtualAddress::new(crate::USER_HEAP_OFFSET), &mut new_table, &mut temporary_page); - }); - context.heap = Some(heap_shared); - } - - // Move grants - for grant in grants.lock().iter_mut() { - let start = VirtualAddress::new(grant.start_address().get() + crate::USER_GRANT_OFFSET - crate::USER_TMP_GRANT_OFFSET); - grant.move_to(start, &mut new_table, &mut temporary_page); - } - context.grants = grants; - } - - // Setup user stack - if let Some(stack_shared) = stack_option { - if flags & CLONE_STACK == CLONE_STACK { - let frame = active_table.p4()[crate::USER_STACK_PML4].pointed_frame().expect("user stack not mapped"); - let flags = active_table.p4()[crate::USER_STACK_PML4].flags(); - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - mapper.p4_mut()[crate::USER_STACK_PML4].set(frame, flags); - }); - } else { - stack_shared.with(|stack| { - stack.move_to(VirtualAddress::new(crate::USER_STACK_OFFSET), &mut new_table, &mut temporary_page); - }); - } - context.stack = Some(stack_shared); - } - - // Setup user sigstack - if let Some(mut sigstack) = sigstack_option { - sigstack.move_to(VirtualAddress::new(crate::USER_SIGSTACK_OFFSET), &mut new_table, &mut temporary_page); - context.sigstack = Some(sigstack); - } - - // Set up TCB - let tcb_addr = crate::USER_TCB_OFFSET + context.id.into() * PAGE_SIZE; - let mut tcb = context::memory::Memory::new( - VirtualAddress::new(tcb_addr), - PAGE_SIZE, - EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE | EntryFlags::USER_ACCESSIBLE, - true - ); - - // Setup user TLS - if let Some(mut tls) = tls_option { - // Copy TLS mapping - { - let frame = active_table.p4()[crate::USER_TLS_PML4].pointed_frame().expect("user tls not mapped"); - let flags = active_table.p4()[crate::USER_TLS_PML4].flags(); - active_table.with(&mut new_table, &mut temporary_page, |mapper| { - mapper.p4_mut()[crate::USER_TLS_PML4].set(frame, flags); - }); - } - - // TODO: Make sure size is not greater than USER_TLS_SIZE - let tls_addr = crate::USER_TLS_OFFSET + context.id.into() * crate::USER_TLS_SIZE; - //println!("{}: Copy TLS: address 0x{:x}, size 0x{:x}", context.id.into(), tls_addr, tls.mem.size()); - tls.mem.move_to(VirtualAddress::new(tls_addr), &mut new_table, &mut temporary_page); - unsafe { - *(tcb_addr as *mut usize) = tls.mem.start_address().get() + tls.mem.size(); - } - context.tls = Some(tls); - } else { - //println!("{}: Copy TCB", context.id.into()); - let parent_tcb_addr = crate::USER_TCB_OFFSET + ppid.into() * PAGE_SIZE; - unsafe { - intrinsics::copy(parent_tcb_addr as *const u8, - tcb_addr as *mut u8, - tcb.size()); - } - } - - tcb.move_to(VirtualAddress::new(tcb_addr), &mut new_table, &mut temporary_page); - context.image.push(tcb.to_shared()); - - context.name = name; - - context.cwd = cwd; - - context.files = files; - - context.actions = actions; - } - } - - let ptrace_event = PtraceEvent { - tag: PTRACE_EVENT_CLONE, - data: PtraceEventData { - clone: pid.into() - } - }; - - if ptrace::send_event(ptrace_event).is_some() { - // Freeze the clone, allow ptrace to put breakpoints - // to it before it starts - let contexts = context::contexts(); - let context = contexts.get(pid).expect("Newly created context doesn't exist??"); - let mut context = context.write(); - context.ptrace_stop = true; - } - - // Race to pick up the new process! - ipi(IpiKind::Switch, IpiTarget::Other); - - let _ = unsafe { context::switch() }; - - Ok(pid) -} - -fn empty(context: &mut context::Context, reaping: bool) { - if reaping { - // Memory should already be unmapped - assert!(context.image.is_empty()); - assert!(context.heap.is_none()); - assert!(context.stack.is_none()); - assert!(context.sigstack.is_none()); - assert!(context.tls.is_none()); - } else { - // Unmap previous image, heap, grants, stack, and tls - context.image.clear(); - drop(context.heap.take()); - drop(context.stack.take()); - drop(context.sigstack.take()); - drop(context.tls.take()); - } - - let mut grants = context.grants.lock(); - if Arc::strong_count(&context.grants) == 1 { - for grant in grants.drain(..) { - if reaping { - println!("{}: {}: Grant should not exist: {:?}", context.id.into(), unsafe { ::core::str::from_utf8_unchecked(&context.name.lock()) }, grant); - - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_table()) }; - let mut temporary_page = TemporaryPage::new(Page::containing_address(VirtualAddress::new(crate::USER_TMP_GRANT_OFFSET))); - - grant.unmap_inactive(&mut new_table, &mut temporary_page); - } else { - grant.unmap(); - } - } - } -} - -struct ExecFile(FileHandle); - -impl Drop for ExecFile { - fn drop(&mut self) { - let _ = syscall::close(self.0); - } -} - -fn fexec_noreturn( - setuid: Option, - setgid: Option, - name: Box<[u8]>, - data: Box<[u8]>, - args: Box<[Box<[u8]>]>, - vars: Box<[Box<[u8]>]> -) -> ! { - let entry; - let mut sp = crate::USER_STACK_OFFSET + crate::USER_STACK_SIZE - 256; - +use alloc::{sync::Arc, vec::Vec}; +use core::{mem, num::NonZeroUsize}; + +use rmm::Arch; +use spin::RwLock; + +use crate::{ + context::{ + memory::{AddrSpace, Grant, PageSpan}, + ContextRef, + }, + event, + scheme::GlobalSchemes, + syscall::EventFlags, +}; + +use crate::{ + context, + paging::{Page, VirtualAddress, PAGE_SIZE}, + syscall::{error::*, flag::MapFlags}, + Bootstrap, CurrentRmmArch, +}; + +use super::usercopy::UserSliceWo; + +pub fn exit_this_context(excp: Option) -> ! { + let close_files; + let addrspace_opt; + + let context_lock = context::current(); { - let (vfork, ppid, files) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH)).expect("exec_noreturn pid not found"); - let mut context = context_lock.write(); - - context.name = Arc::new(Mutex::new(name)); - - empty(&mut context, false); - - if let Some(uid) = setuid { - context.euid = uid; - } - - if let Some(gid) = setgid { - context.egid = gid; - } - - // Map and copy new segments - let mut tls_option = None; - { - let elf = elf::Elf::from(&data).unwrap(); - entry = elf.entry(); - - // Always map TCB - let tcb_addr = crate::USER_TCB_OFFSET + context.id.into() * PAGE_SIZE; - let tcb_mem = context::memory::Memory::new( - VirtualAddress::new(tcb_addr), - PAGE_SIZE, - EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE | EntryFlags::USER_ACCESSIBLE, - true - ); - - for segment in elf.segments() { - match segment.p_type { - program_header::PT_LOAD => { - let voff = segment.p_vaddr as usize % PAGE_SIZE; - let vaddr = segment.p_vaddr as usize - voff; - - let mut memory = context::memory::Memory::new( - VirtualAddress::new(vaddr), - segment.p_memsz as usize + voff, - EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE, - true - ); - - unsafe { - // Copy file data - intrinsics::copy((elf.data.as_ptr() as usize + segment.p_offset as usize) as *const u8, - segment.p_vaddr as *mut u8, - segment.p_filesz as usize); - } - - let mut flags = EntryFlags::NO_EXECUTE | EntryFlags::USER_ACCESSIBLE; - - if segment.p_flags & program_header::PF_R == program_header::PF_R { - flags.insert(EntryFlags::PRESENT); - } - - // W ^ X. If it is executable, do not allow it to be writable, even if requested - if segment.p_flags & program_header::PF_X == program_header::PF_X { - flags.remove(EntryFlags::NO_EXECUTE); - } else if segment.p_flags & program_header::PF_W == program_header::PF_W { - flags.insert(EntryFlags::WRITABLE); - } - - memory.remap(flags); - - context.image.push(memory.to_shared()); - }, - program_header::PT_TLS => { - let aligned_size = if segment.p_align > 0 { - ((segment.p_memsz + (segment.p_align - 1))/segment.p_align) * segment.p_align - } else { - segment.p_memsz - } as usize; - let rounded_size = ((aligned_size + PAGE_SIZE - 1)/PAGE_SIZE) * PAGE_SIZE; - let rounded_offset = rounded_size - aligned_size; - - // TODO: Make sure size is not greater than USER_TLS_SIZE - let tls_addr = crate::USER_TLS_OFFSET + context.id.into() * crate::USER_TLS_SIZE; - let tls = context::memory::Tls { - master: VirtualAddress::new(segment.p_vaddr as usize), - file_size: segment.p_filesz as usize, - mem: context::memory::Memory::new( - VirtualAddress::new(tls_addr), - rounded_size as usize, - EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE | EntryFlags::USER_ACCESSIBLE, - true - ), - offset: rounded_offset as usize, - }; - - unsafe { - *(tcb_addr as *mut usize) = tls.mem.start_address().get() + tls.mem.size(); - } - - tls_option = Some(tls); - }, - _ => (), - } - } - - context.image.push(tcb_mem.to_shared()); - } - - // Data no longer required, can deallocate - drop(data); - - // Map heap - context.heap = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_HEAP_OFFSET), - 0, - EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE | EntryFlags::USER_ACCESSIBLE, - true - ).to_shared()); - - // Map stack - context.stack = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_STACK_OFFSET), - crate::USER_STACK_SIZE, - EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE | EntryFlags::USER_ACCESSIBLE, - true - ).to_shared()); - - // Map stack - context.sigstack = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_SIGSTACK_OFFSET), - crate::USER_SIGSTACK_SIZE, - EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE | EntryFlags::USER_ACCESSIBLE, - true - )); - - // Map TLS - if let Some(mut tls) = tls_option { - unsafe { - tls.load(); - } - - context.tls = Some(tls); - } - - let mut arg_size = 0; - - // Push arguments and variables - for iter in &[&vars, &args] { - // Push null-terminator - sp -= mem::size_of::(); - unsafe { *(sp as *mut usize) = 0; } - - // Push content - for arg in iter.iter().rev() { - sp -= mem::size_of::(); - unsafe { *(sp as *mut usize) = crate::USER_ARG_OFFSET + arg_size; } - - arg_size += arg.len() + 1; - } - } - - // Push arguments length - sp -= mem::size_of::(); - unsafe { *(sp as *mut usize) = args.len(); } - - if arg_size > 0 { - let mut memory = context::memory::Memory::new( - VirtualAddress::new(crate::USER_ARG_OFFSET), - arg_size, - EntryFlags::NO_EXECUTE | EntryFlags::WRITABLE, - true - ); - - let mut arg_offset = 0; - for arg in vars.iter().rev().chain(args.iter().rev()) { - unsafe { - intrinsics::copy(arg.as_ptr(), - (crate::USER_ARG_OFFSET + arg_offset) as *mut u8, - arg.len()); - } - arg_offset += arg.len(); - - unsafe { - *((crate::USER_ARG_OFFSET + arg_offset) as *mut u8) = 0; - } - arg_offset += 1; - } - - memory.remap(EntryFlags::NO_EXECUTE | EntryFlags::USER_ACCESSIBLE); - - context.image.push(memory.to_shared()); - } - - // Args no longer required, can deallocate - drop(args); - - context.actions = Arc::new(Mutex::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: 0, - }, - 0 - ); 128])); - - let vfork = context.vfork; - context.vfork = false; - - let files = Arc::clone(&context.files); - - (vfork, context.ppid, files) - }; - - for (_fd, file_option) in files.lock().iter_mut().enumerate() { - let mut cloexec = false; - if let Some(ref file) = *file_option { - if file.cloexec { - cloexec = true; - } - } - - if cloexec { - let _ = file_option.take().unwrap().close(); - } - } - - if vfork { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.get(ppid) { - let mut context = context_lock.write(); - if ! context.unblock() { - println!("{} not blocked for exec vfork unblock", ppid.into()); - } - } else { - println!("{} not found for exec vfork unblock", ppid.into()); - } - } - } - - // Go to usermode - unsafe { usermode(entry, sp, 0); } -} - -pub fn fexec_kernel(fd: FileHandle, args: Box<[Box<[u8]>]>, vars: Box<[Box<[u8]>]>, name_override_opt: Option>) -> Result { - let (uid, gid) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.euid, context.egid) - }; - - let mut stat: Stat; - let mut name: Vec; - let mut data: Vec; - { - let file = ExecFile(fd); - - stat = Stat::default(); - syscall::file_op_mut_slice(syscall::number::SYS_FSTAT, file.0, &mut stat)?; - - let mut perm = stat.st_mode & 0o7; - if stat.st_uid == uid { - perm |= (stat.st_mode >> 6) & 0o7; - } - if stat.st_gid == gid { - perm |= (stat.st_mode >> 3) & 0o7; - } - if uid == 0 { - perm |= 0o7; - } - - if perm & 0o1 != 0o1 { - return Err(Error::new(EACCES)); - } - - if let Some(name_override) = name_override_opt { - name = Vec::from(name_override); - } else { - name = vec![0; 4096]; - let len = syscall::file_op_mut_slice(syscall::number::SYS_FPATH, file.0, &mut name)?; - name.truncate(len); - } - - //TODO: Only read elf header, not entire file. Then read required segments - data = vec![0; stat.st_size as usize]; - syscall::file_op_mut_slice(syscall::number::SYS_READ, file.0, &mut data)?; - drop(file); - } - - // Set UID and GID are determined after resolving any hashbangs - let setuid = if stat.st_mode & syscall::flag::MODE_SETUID == syscall::flag::MODE_SETUID { - Some(stat.st_uid) - } else { - None - }; - - let setgid = if stat.st_mode & syscall::flag::MODE_SETGID == syscall::flag::MODE_SETGID { - Some(stat.st_gid) - } else { - None + let mut context = context_lock.write(); + close_files = Arc::try_unwrap(mem::take(&mut context.files)) + .map_or_else(|_| Vec::new(), RwLock::into_inner); + addrspace_opt = context + .set_addr_space(None) + .and_then(|a| Arc::try_unwrap(a).ok()); + drop(context.syscall_head.take()); + drop(context.syscall_tail.take()); + } + + // Files must be closed while context is valid so that messages can be passed + for file_opt in close_files.into_iter() { + if let Some(file) = file_opt { + let _ = file.close(); + } + } + drop(addrspace_opt); + // TODO: Should status == Status::HardBlocked be handled differently? + let owner = { + let mut guard = context_lock.write(); + guard.status = context::Status::Dead { excp }; + guard.owner_proc_id }; - - // The argument list is limited to avoid using too much userspace stack - // This check is done last to allow all hashbangs to be resolved - // - // This should be based on the size of the userspace stack, divided - // by the cost of each argument, which should be usize * 2, with - // one additional argument added to represent the total size of the - // argument pointer array and potential padding - // - // A limit of 4095 would mean a stack of (4095 + 1) * 8 * 2 = 65536, or 64KB - if (args.len() + vars.len()) > 4095 { - return Err(Error::new(E2BIG)); - } - - match elf::Elf::from(&data) { - Ok(elf) => { - // We check the validity of all loadable sections here - for segment in elf.segments() { - match segment.p_type { - program_header::PT_INTERP => { - //TODO: length restraint, parse interp earlier - let mut interp = vec![0; segment.p_memsz as usize]; - unsafe { - intrinsics::copy((elf.data.as_ptr() as usize + segment.p_offset as usize) as *const u8, - interp.as_mut_ptr(), - segment.p_filesz as usize); - } - - let mut i = 0; - while i < interp.len() { - if interp[i] == 0 { - break; - } - i += 1; - } - interp.truncate(i); - - println!(" interpreter: {:?}", ::core::str::from_utf8(&interp)); - - let interp_fd = super::fs::open(&interp, super::flag::O_RDONLY | super::flag::O_CLOEXEC)?; - - let mut args_vec = Vec::from(args); - args_vec.insert(0, interp.into_boxed_slice()); - //TODO: pass file handle in auxv - let name_override = name.into_boxed_slice(); - args_vec[1] = name_override.clone(); - - return fexec_kernel( - interp_fd, - args_vec.into_boxed_slice(), - vars, - Some(name_override), - ); - }, - program_header::PT_LOAD => { - let voff = segment.p_vaddr as usize % PAGE_SIZE; - let vaddr = segment.p_vaddr as usize - voff; - - // Due to the Userspace and kernel TLS bases being located right above 2GB, - // limit any loadable sections to lower than that. Eventually we will need - // to replace this with a more intelligent TLS address - if vaddr >= 0x8000_0000 { - println!("exec: invalid section address {:X}", segment.p_vaddr); - return Err(Error::new(ENOEXEC)); - } - }, - _ => (), - } - } - }, - Err(err) => { - println!("fexec: failed to execute {}: {}", fd.into(), err); - return Err(Error::new(ENOEXEC)); - } - } - - // This is the point of no return, quite literaly. Any checks for validity need - // to be done before, and appropriate errors returned. Otherwise, we have nothing - // to return to. - fexec_noreturn(setuid, setgid, name.into_boxed_slice(), data.into_boxed_slice(), args, vars); -} - -pub fn fexec(fd: FileHandle, arg_ptrs: &[[usize; 2]], var_ptrs: &[[usize; 2]]) -> Result { - let mut args = Vec::new(); - for arg_ptr in arg_ptrs { - let arg = validate_slice(arg_ptr[0] as *const u8, arg_ptr[1])?; - // Argument must be moved into kernel space before exec unmaps all memory - args.push(arg.to_vec().into_boxed_slice()); - } - drop(arg_ptrs); - - let mut vars = Vec::new(); - for var_ptr in var_ptrs { - let var = validate_slice(var_ptr[0] as *const u8, var_ptr[1])?; - // Argument must be moved into kernel space before exec unmaps all memory - vars.push(var.to_vec().into_boxed_slice()); - } - drop(var_ptrs); - - fexec_kernel(fd, args.into_boxed_slice(), vars.into_boxed_slice(), None) -} - -pub fn exit(status: usize) -> ! { - { - let context_lock = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH)).expect("exit failed to find context"); - Arc::clone(&context_lock) - }; - - let mut close_files = Vec::new(); - let pid = { - let mut context = context_lock.write(); - { - let mut lock = context.files.lock(); - if Arc::strong_count(&context.files) == 1 { - mem::swap(lock.deref_mut(), &mut close_files); - } - } - context.files = Arc::new(Mutex::new(Vec::new())); - context.id - }; - - // Files must be closed while context is valid so that messages can be passed - for (_fd, file_option) in close_files.drain(..).enumerate() { - if let Some(file) = file_option { - let _ = file.close(); - } - } - - // PGID and PPID must be grabbed after close, as context switches could change PGID or PPID if parent exits - let (pgid, ppid) = { - let context = context_lock.read(); - (context.pgid, context.ppid) - }; - - // Transfer child processes to parent - { - let contexts = context::contexts(); - for (_id, context_lock) in contexts.iter() { - let mut context = context_lock.write(); - if context.ppid == pid { - context.ppid = ppid; - context.vfork = false; - } - } - } - - let (vfork, children) = { - let mut context = context_lock.write(); - - empty(&mut context, false); - - let vfork = context.vfork; - context.vfork = false; - - context.status = context::Status::Exited(status); - - let children = context.waitpid.receive_all(); - - (vfork, children) - }; - - { - let contexts = context::contexts(); - if let Some(parent_lock) = contexts.get(ppid) { - let waitpid = { - let mut parent = parent_lock.write(); - if vfork { - if ! parent.unblock() { - println!("{}: {} not blocked for exit vfork unblock", pid.into(), ppid.into()); - } - } - Arc::clone(&parent.waitpid) - }; - - for (c_pid, c_status) in children { - waitpid.send(c_pid, c_status); - } - - waitpid.send(WaitpidKey { - pid: Some(pid), - pgid: Some(pgid) - }, (pid, status)); - } else { - println!("{}: {} not found for exit vfork unblock", pid.into(), ppid.into()); - } - } - - // Alert any tracers waiting for process (important: AFTER sending waitpid event) - ptrace::close_tracee(pid); - - if pid == ContextId::from(1) { - println!("Main kernel thread exited with status {:X}", status); - - extern { - fn kreset() -> !; - fn kstop() -> !; - } - - if status == SIGTERM { - unsafe { kreset(); } - } else { - unsafe { kstop(); } - } - } - } - - let _ = unsafe { context::switch() }; - + if let Some(owner) = owner { + let _ = event::trigger( + GlobalSchemes::Proc.scheme_id(), + owner.get(), + EventFlags::EVENT_READ, + ); + } + let _ = context::contexts_mut().remove(&ContextRef(context_lock)); + context::switch(); unreachable!(); } -pub fn getpid() -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - Ok(context.id) -} +pub fn mprotect(address: usize, size: usize, flags: MapFlags) -> Result<()> { + // println!("mprotect {:#X}, {}, {:#X}", address, size, flags); -pub fn getpgid(pid: ContextId) -> Result { - let contexts = context::contexts(); - let context_lock = if pid.into() == 0 { - contexts.current().ok_or(Error::new(ESRCH))? - } else { - contexts.get(pid).ok_or(Error::new(ESRCH))? - }; - let context = context_lock.read(); - Ok(context.pgid) -} + let span = PageSpan::validate_nonempty(VirtualAddress::new(address), size) + .ok_or(Error::new(EINVAL))?; -pub fn getppid() -> Result { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - Ok(context.ppid) + AddrSpace::current()?.mprotect(span, flags) } -pub fn kill(pid: ContextId, sig: usize) -> Result { - let (ruid, euid, current_pgid) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.ruid, context.euid, context.pgid) - }; - - if sig < 0x7F { - let mut found = 0; - let mut sent = 0; +pub unsafe fn usermode_bootstrap(bootstrap: &Bootstrap) { + assert_ne!(bootstrap.page_count, 0); - { - let contexts = context::contexts(); - - let send = |context: &mut context::Context| -> bool { - if euid == 0 - || euid == context.ruid - || ruid == context.ruid - { - // If sig = 0, test that process exists and can be - // signalled, but don't send any signal. - if sig != 0 { - //TODO: sigprocmask - context.pending.push_back(sig as u8); - // Convert stopped processes to blocked if sending SIGCONT - if sig == SIGCONT { - if let context::Status::Stopped(_sig) = context.status { - context.status = context::Status::Blocked; - } - } - } - true - } else { - false - } - }; - - if pid.into() as isize > 0 { - // Send to a single process - if let Some(context_lock) = contexts.get(pid) { - let mut context = context_lock.write(); - - found += 1; - if send(&mut context) { - sent += 1; - } - } - } else if pid.into() as isize == -1 { - // Send to every process with permission, except for init - for (_id, context_lock) in contexts.iter() { - let mut context = context_lock.write(); - - if context.id.into() > 2 { - found += 1; - - if send(&mut context) { - sent += 1; - } - } - } - } else { - let pgid = if pid.into() == 0 { - current_pgid - } else { - ContextId::from(-(pid.into() as isize) as usize) - }; - - // Send to every process in the process group whose ID - for (_id, context_lock) in contexts.iter() { - let mut context = context_lock.write(); - - if context.pgid == pgid { - found += 1; - - if send(&mut context) { - sent += 1; - } - } - } - } - } - - if found == 0 { - Err(Error::new(ESRCH)) - } else if sent == 0 { - Err(Error::new(EPERM)) - } else { - // Switch to ensure delivery to self - unsafe { context::switch(); } - - Ok(0) - } - } else { - Err(Error::new(EINVAL)) - } -} - -pub fn mprotect(address: usize, size: usize, flags: usize) -> Result { - println!("mprotect {:#X}, {}, {:#X}", address, size, flags); - - let end_offset = size.checked_sub(1).ok_or(Error::new(EFAULT))?; - let end_address = address.checked_add(end_offset).ok_or(Error::new(EFAULT))?; - - let mut active_table = unsafe { ActivePageTable::new() }; - - let mut flush_all = MapperFlushAll::new(); - - let start_page = Page::containing_address(VirtualAddress::new(address)); - let end_page = Page::containing_address(VirtualAddress::new(end_address)); - for page in Page::range_inclusive(start_page, end_page) { - if let Some(mut page_flags) = active_table.translate_page_flags(page) { - if flags & PROT_EXEC > 0 { - page_flags.remove(EntryFlags::NO_EXECUTE); - } else { - page_flags.insert(EntryFlags::NO_EXECUTE); - } - - if flags & PROT_WRITE > 0 { - //TODO: Not allowing gain of write privileges - } else { - page_flags.remove(EntryFlags::WRITABLE); - } - - if flags & PROT_READ > 0 { - //TODO: No flags for readable pages - } else { - //TODO: No flags for readable pages - } - - let flush = active_table.remap(page, page_flags); - flush_all.consume(flush); - } else { - return Err(Error::new(EFAULT)); - } - } - - flush_all.flush(&mut active_table); - - Ok(0) -} - -pub fn setpgid(pid: ContextId, pgid: ContextId) -> Result { - let contexts = context::contexts(); - - let current_pid = { - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - context.id - }; - - let context_lock = if pid.into() == 0 { - contexts.current().ok_or(Error::new(ESRCH))? - } else { - contexts.get(pid).ok_or(Error::new(ESRCH))? - }; - - let mut context = context_lock.write(); - if context.id == current_pid || context.ppid == current_pid { - if pgid.into() == 0 { - context.pgid = context.id; - } else { - context.pgid = pgid; - } - Ok(0) - } else { - Err(Error::new(ESRCH)) - } -} - -pub fn sigaction(sig: usize, act_opt: Option<&SigAction>, oldact_opt: Option<&mut SigAction>, restorer: usize) -> Result { - if sig > 0 && sig <= 0x7F { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let mut actions = context.actions.lock(); - - if let Some(oldact) = oldact_opt { - *oldact = actions[sig].0; - } - - if let Some(act) = act_opt { - actions[sig] = (*act, restorer); - } - - Ok(0) - } else { - Err(Error::new(EINVAL)) - } -} - -pub fn sigprocmask(how: usize, mask_opt: Option<&[u64; 2]>, oldmask_opt: Option<&mut [u64; 2]>) -> Result { { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - - if let Some(oldmask) = oldmask_opt { - *oldmask = context.sigmask; - } - - if let Some(mask) = mask_opt { - match how { - SIG_BLOCK => { - context.sigmask[0] |= mask[0]; - context.sigmask[1] |= mask[1]; - }, - SIG_UNBLOCK => { - context.sigmask[0] &= !mask[0]; - context.sigmask[1] &= !mask[1]; - }, - SIG_SETMASK => { - context.sigmask[0] = mask[0]; - context.sigmask[1] = mask[1]; + let addr_space = Arc::clone( + context::current() + .read() + .addr_space() + .expect("expected bootstrap context to have an address space"), + ); + + let base = Page::containing_address(VirtualAddress::new(PAGE_SIZE)); + let flags = MapFlags::MAP_FIXED_NOREPLACE + | MapFlags::PROT_EXEC + | MapFlags::PROT_READ + | MapFlags::PROT_WRITE; + + let page_count = + NonZeroUsize::new(bootstrap.page_count).expect("bootstrap contained no pages!"); + + let _base_page = addr_space + .acquire_write() + .mmap( + &addr_space, + Some(base), + page_count, + flags, + &mut Vec::new(), + |page, flags, mapper, flusher| { + let shared = false; + Ok(Grant::zeroed( + PageSpan::new(page, bootstrap.page_count), + flags, + mapper, + flusher, + shared, + )?) }, - _ => { - return Err(Error::new(EINVAL)); - } - } - } + ) + .expect("Failed to allocate bootstrap pages"); } - Ok(0) -} -pub fn sigreturn() -> Result { - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - context.ksig_restore = true; - context.block(); - } + let bootstrap_slice = unsafe { bootstrap_mem(bootstrap) }; + UserSliceWo::new(PAGE_SIZE, bootstrap.page_count * PAGE_SIZE) + .expect("failed to create bootstrap user slice") + .copy_from_slice(bootstrap_slice) + .expect("failed to copy memory to bootstrap"); - let _ = unsafe { context::switch() }; + let bootstrap_entry = u64::from_le_bytes(bootstrap_slice[0x1a..0x22].try_into().unwrap()); + log::info!("Bootstrap entry point: {:X}", bootstrap_entry); + assert_ne!(bootstrap_entry, 0); + println!("\n"); - unreachable!(); -} + // Start in a minimal environment without any stack. -pub fn umask(mask: usize) -> Result { - let previous; + match context::current() + .write() + .regs_mut() + .expect("bootstrap needs registers to be available") { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - previous = context.umask; - context.umask = mask; - } - - Ok(previous) -} - -fn reap(pid: ContextId) -> Result { - // Spin until not running - let mut running = true; - while running { - { - let contexts = context::contexts(); - let context_lock = contexts.get(pid).ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - running = context.running; + ref mut regs => { + regs.init(); + regs.set_instr_pointer(bootstrap_entry.try_into().unwrap()); } - - interrupt::pause(); - } - - let mut contexts = context::contexts_mut(); - let context_lock = contexts.remove(pid).ok_or(Error::new(ESRCH))?; - { - let mut context = context_lock.write(); - empty(&mut context, true); } - drop(context_lock); - - Ok(pid) } -pub fn waitpid(pid: ContextId, status_ptr: usize, flags: usize) -> Result { - let (ppid, waitpid) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.id, Arc::clone(&context.waitpid)) - }; - - let mut tmp = [0]; - let status_slice = if status_ptr != 0 { - validate_slice_mut(status_ptr as *mut usize, 1)? - } else { - &mut tmp - }; - - let mut grim_reaper = |w_pid: ContextId, status: usize| -> Option> { - if wifcontinued(status) { - if flags & WCONTINUED == WCONTINUED { - status_slice[0] = status; - Some(Ok(w_pid)) - } else { - None - } - } else if wifstopped(status) { - if flags & WUNTRACED == WUNTRACED { - status_slice[0] = status; - Some(Ok(w_pid)) - } else { - None - } - } else { - status_slice[0] = status; - Some(reap(w_pid)) - } - }; - - loop { - let res_opt = if pid.into() == 0 { - // Check for existence of child - { - let mut found = false; - - let contexts = context::contexts(); - for (_id, context_lock) in contexts.iter() { - let context = context_lock.read(); - if context.ppid == ppid { - found = true; - break; - } - } - - if ! found { - return Err(Error::new(ECHILD)); - } - } - - if flags & WNOHANG == WNOHANG { - if let Some((_wid, (w_pid, status))) = waitpid.receive_any_nonblock() { - grim_reaper(w_pid, status) - } else { - Some(Ok(ContextId::from(0))) - } - } else { - let (_wid, (w_pid, status)) = waitpid.receive_any(); - grim_reaper(w_pid, status) - } - } else if (pid.into() as isize) < 0 { - let pgid = ContextId::from(-(pid.into() as isize) as usize); - - // Check for existence of child in process group PGID - { - let mut found = false; - - let contexts = context::contexts(); - for (_id, context_lock) in contexts.iter() { - let context = context_lock.read(); - if context.pgid == pgid { - found = true; - break; - } - } - - if ! found { - return Err(Error::new(ECHILD)); - } - } - - if flags & WNOHANG == WNOHANG { - if let Some((w_pid, status)) = waitpid.receive_nonblock(&WaitpidKey { - pid: None, - pgid: Some(pgid) - }) { - grim_reaper(w_pid, status) - } else { - Some(Ok(ContextId::from(0))) - } - } else { - let (w_pid, status) = waitpid.receive(&WaitpidKey { - pid: None, - pgid: Some(pgid) - }); - grim_reaper(w_pid, status) - } - } else { - let hack_status = { - let contexts = context::contexts(); - let context_lock = contexts.get(pid).ok_or(Error::new(ECHILD))?; - let mut context = context_lock.write(); - if context.ppid != ppid { - println!("TODO: Hack for rustc - changing ppid of {} from {} to {}", context.id.into(), context.ppid.into(), ppid.into()); - context.ppid = ppid; - //return Err(Error::new(ECHILD)); - Some(context.status) - } else { - None - } - }; - - if let Some(context::Status::Exited(status)) = hack_status { - let _ = waitpid.receive_nonblock(&WaitpidKey { - pid: Some(pid), - pgid: None - }); - grim_reaper(pid, status) - } else if flags & WNOHANG == WNOHANG { - if let Some((w_pid, status)) = waitpid.receive_nonblock(&WaitpidKey { - pid: Some(pid), - pgid: None - }) { - grim_reaper(w_pid, status) - } else { - Some(Ok(ContextId::from(0))) - } - } else { - let (w_pid, status) = waitpid.receive(&WaitpidKey { - pid: Some(pid), - pgid: None - }); - grim_reaper(w_pid, status) - } - }; - - if let Some(res) = res_opt { - return res; - } - } +pub unsafe fn bootstrap_mem(bootstrap: &crate::Bootstrap) -> &'static [u8] { + core::slice::from_raw_parts( + CurrentRmmArch::phys_to_virt(bootstrap.base.base()).data() as *const u8, + bootstrap.page_count * PAGE_SIZE, + ) } diff --git a/src/syscall/time.rs b/src/syscall/time.rs index 4bad432c..ddf6b238 100644 --- a/src/syscall/time.rs +++ b/src/syscall/time.rs @@ -1,49 +1,82 @@ -use crate::time; -use crate::context; -use crate::syscall::data::TimeSpec; -use crate::syscall::error::*; -use crate::syscall::flag::{CLOCK_REALTIME, CLOCK_MONOTONIC}; +use crate::{ + context, + syscall::{ + data::TimeSpec, + error::*, + flag::{CLOCK_MONOTONIC, CLOCK_REALTIME}, + }, + time, +}; -pub fn clock_gettime(clock: usize, time: &mut TimeSpec) -> Result { +use super::usercopy::{UserSliceRo, UserSliceWo}; + +pub fn clock_gettime(clock: usize, buf: UserSliceWo) -> Result<()> { let arch_time = match clock { CLOCK_REALTIME => time::realtime(), CLOCK_MONOTONIC => time::monotonic(), - _ => return Err(Error::new(EINVAL)) + _ => return Err(Error::new(EINVAL)), }; - time.tv_sec = arch_time.0 as i64; - time.tv_nsec = arch_time.1 as i32; - Ok(0) + buf.copy_exactly(&TimeSpec { + tv_sec: (arch_time / time::NANOS_PER_SEC) as i64, + tv_nsec: (arch_time % time::NANOS_PER_SEC) as i32, + }) } /// Nanosleep will sleep by switching the current context -pub fn nanosleep(req: &TimeSpec, rem_opt: Option<&mut TimeSpec>) -> Result { - //start is a tuple of (seconds, nanoseconds) +pub fn nanosleep(req_buf: UserSliceRo, rem_buf_opt: Option) -> Result<()> { + let req = unsafe { req_buf.read_exact::()? }; + let start = time::monotonic(); - let sum = start.1 + req.tv_nsec as u64; - let end = (start.0 + req.tv_sec as u64 + sum / 1_000_000_000, sum % 1_000_000_000); + let end = start + (req.tv_sec as u128 * time::NANOS_PER_SEC) + (req.tv_nsec as u128); + let current_context = context::current(); { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); + let mut context = current_context.write(); + + if let Some((tctl, pctl, _)) = context.sigcontrol() { + if tctl.currently_pending_unblocked(pctl) != 0 { + return Err(Error::new(EINTR)); + } + } context.wake = Some(end); - context.block(); + context.block("nanosleep"); } - unsafe { context::switch(); } + // TODO: The previous wakeup reason was most likely signals, but is there any other possible + // reason? + context::switch(); - if let Some(rem) = rem_opt { - //TODO let current = time::monotonic(); - rem.tv_sec = 0; - rem.tv_nsec = 0; + let was_interrupted = current_context.write().wake.take().is_some(); + + if let Some(rem_buf) = rem_buf_opt { + let current = time::monotonic(); + + rem_buf.copy_exactly(&if current < end { + let diff = end - current; + TimeSpec { + tv_sec: (diff / time::NANOS_PER_SEC) as i64, + tv_nsec: (diff % time::NANOS_PER_SEC) as i32, + } + } else { + TimeSpec { + tv_sec: 0, + tv_nsec: 0, + } + })?; } - Ok(0) + if was_interrupted { + Err(Error::new(EINTR)) + } else { + Ok(()) + } } -pub fn sched_yield() -> Result { - unsafe { context::switch(); } - Ok(0) +pub fn sched_yield() -> Result<()> { + context::switch(); + // TODO: Do this check in userspace + context::signal::signal_handler(); + Ok(()) } diff --git a/src/syscall/usercopy.rs b/src/syscall/usercopy.rs new file mode 100644 index 00000000..84d11909 --- /dev/null +++ b/src/syscall/usercopy.rs @@ -0,0 +1,260 @@ +use syscall::dirent::Buffer; + +use crate::{ + context::memory::PageSpan, + memory::PAGE_SIZE, + paging::{Page, VirtualAddress}, +}; + +use crate::arch::{arch_copy_from_user, arch_copy_to_user}; + +use crate::syscall::error::{Error, Result, EFAULT, EINVAL}; + +#[derive(Clone, Copy)] +pub struct UserSlice { + base: usize, + len: usize, +} +pub type UserSliceRo = UserSlice; +pub type UserSliceWo = UserSlice; +pub type UserSliceRw = UserSlice; + +impl UserSlice { + pub fn empty() -> Self { + Self { base: 0, len: 0 } + } + pub fn len(&self) -> usize { + self.len + } + pub fn is_empty(&self) -> bool { + self.len == 0 + } + pub fn addr(&self) -> usize { + self.base + } + pub fn new(base: usize, len: usize) -> Result { + if base >= crate::USER_END_OFFSET || base.saturating_add(len) >= crate::USER_END_OFFSET { + return Err(Error::new(EFAULT)); + } + + Ok(Self { base, len }) + } + /// Split [0, end) into [0, idx) and [idx, end) + pub fn split_at(self, idx: usize) -> Option<(Self, Self)> { + if idx > self.len { + return None; + } + Some(( + Self { + base: self.base, + len: idx, + }, + Self { + base: self.base + idx, + len: self.len - idx, + }, + )) + } + pub fn advance(self, by: usize) -> Option { + Some(self.split_at(by)?.1) + } + pub fn limit(self, to: usize) -> Option { + Some(self.split_at(to)?.0) + } + pub fn none_if_null(self) -> Option { + if self.addr() == 0 { + None + } else { + Some(self) + } + } + /// Not unsafe, because user memory is not covered by the memory model that decides if + /// something is UB, but it can break logic invariants + pub fn reinterpret_unchecked( + self, + ) -> UserSlice { + UserSlice { + base: self.base, + len: self.len, + } + } + pub fn in_variable_chunks(self, chunk_size: usize) -> impl Iterator { + (0..self.len()).step_by(chunk_size).map(move |i| { + self.advance(i) + .expect("already limited by length, must succeed") + }) + } + pub fn in_exact_chunks(self, chunk_size: usize) -> impl Iterator { + (0..self.len().div_floor(chunk_size)).map(move |i| { + self.advance(i * chunk_size) + .expect("already limited by length, must succeed") + .limit(chunk_size) + .expect("length is aligned") + }) + } +} +impl UserSlice { + pub fn copy_to_slice(self, slice: &mut [u8]) -> Result<()> { + debug_assert!(is_kernel_mem(slice)); + + if self.len != slice.len() { + return Err(Error::new(EINVAL)); + } + + if unsafe { arch_copy_from_user(slice.as_mut_ptr() as usize, self.base, self.len) } == 0 { + Ok(()) + } else { + Err(Error::new(EFAULT)) + } + } + pub unsafe fn read_exact(self) -> Result { + let mut t: T = core::mem::zeroed(); + let slice = unsafe { + core::slice::from_raw_parts_mut( + (&mut t as *mut T).cast::(), + core::mem::size_of::(), + ) + }; + + self.limit(core::mem::size_of::()) + .ok_or(Error::new(EINVAL))? + .copy_to_slice(slice)?; + + Ok(t) + } + pub fn copy_common_bytes_to_slice(self, slice: &mut [u8]) -> Result { + let min = core::cmp::min(self.len(), slice.len()); + self.limit(min) + .expect("min(len, x) is always <= len") + .copy_to_slice(&mut slice[..min])?; + Ok(min) + } + // TODO: Merge int IO functions? + pub fn read_usize(self) -> Result { + let mut ret = 0_usize.to_ne_bytes(); + self.limit(core::mem::size_of::()) + .ok_or(Error::new(EINVAL))? + .copy_to_slice(&mut ret)?; + Ok(usize::from_ne_bytes(ret)) + } + pub fn read_u32(self) -> Result { + let mut ret = 0_u32.to_ne_bytes(); + self.limit(4) + .ok_or(Error::new(EINVAL))? + .copy_to_slice(&mut ret)?; + Ok(u32::from_ne_bytes(ret)) + } + pub fn read_u64(self) -> Result { + let mut ret = 0_u64.to_ne_bytes(); + self.limit(8) + .ok_or(Error::new(EINVAL))? + .copy_to_slice(&mut ret)?; + Ok(u64::from_ne_bytes(ret)) + } + pub fn usizes(self) -> impl Iterator> { + self.in_exact_chunks(core::mem::size_of::()) + .map(Self::read_usize) + } +} +impl UserSlice { + pub fn copy_from_slice(self, slice: &[u8]) -> Result<()> { + // A zero sized slice will like have 0x1 as address + debug_assert!(is_kernel_mem(slice) || slice.len() == 0); + + if self.len != slice.len() { + return Err(Error::new(EINVAL)); + } + + if unsafe { arch_copy_to_user(self.base, slice.as_ptr() as usize, self.len) } == 0 { + Ok(()) + } else { + Err(Error::new(EFAULT)) + } + } + pub fn copy_common_bytes_from_slice(self, slice: &[u8]) -> Result { + let min = core::cmp::min(self.len(), slice.len()); + self.limit(min) + .expect("min(len, x) is always <= len") + .copy_from_slice(&slice[..min])?; + Ok(min) + } + pub fn copy_exactly(self, slice: &[u8]) -> Result<()> { + self.limit(slice.len()) + .ok_or(Error::new(EINVAL))? + .copy_from_slice(slice)?; + Ok(()) + } + pub fn write_usize(self, word: usize) -> Result<()> { + self.limit(core::mem::size_of::()) + .ok_or(Error::new(EINVAL))? + .copy_from_slice(&word.to_ne_bytes())?; + Ok(()) + } + pub fn write_u32(self, int: u32) -> Result<()> { + self.limit(core::mem::size_of::()) + .ok_or(Error::new(EINVAL))? + .copy_from_slice(&int.to_ne_bytes())?; + Ok(()) + } +} + +impl UserSliceRo { + pub fn ro(base: usize, size: usize) -> Result { + Self::new(base, size) + } +} +impl UserSliceWo { + pub fn wo(base: usize, size: usize) -> Result { + Self::new(base, size) + } +} +impl UserSliceRw { + pub fn rw(base: usize, size: usize) -> Result { + Self::new(base, size) + } +} + +fn is_kernel_mem(slice: &[u8]) -> bool { + (slice.as_ptr() as usize) >= crate::USER_END_OFFSET + && (slice.as_ptr() as usize).checked_add(slice.len()).is_some() +} + +/// Convert `[addr, addr+size)` into `(page, page_count)`. +/// +/// This will fail if: +/// +/// - the base address is not page-aligned, +/// - the length is not page-aligned, +/// - the region is empty (EINVAL), or +/// - any byte in the region exceeds USER_END_OFFSET (EFAULT). +pub fn validate_region(address: usize, size: usize) -> Result { + if address % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || size == 0 { + return Err(Error::new(EINVAL)); + } + if address.saturating_add(size) > crate::USER_END_OFFSET { + return Err(Error::new(EFAULT)); + } + Ok(PageSpan::new( + Page::containing_address(VirtualAddress::new(address)), + size / PAGE_SIZE, + )) +} +impl Buffer<'static> for UserSliceWo { + fn empty() -> Self { + UserSliceWo::empty() + } + fn length(&self) -> usize { + self.len() + } + fn split_at(self, index: usize) -> Option<[Self; 2]> { + let (a, b) = self.split_at(index)?; + Some([a, b]) + } + fn copy_from_slice_exact(self, src: &[u8]) -> Result<()> { + self.copy_exactly(src) + } + fn zero_out(self) -> Result<()> { + // TODO: Implement this. Don't need to as long as the header size is constant, for now. + Ok(()) + } +} diff --git a/src/syscall/validate.rs b/src/syscall/validate.rs deleted file mode 100644 index beb13b05..00000000 --- a/src/syscall/validate.rs +++ /dev/null @@ -1,48 +0,0 @@ -use core::{mem, slice}; - -use crate::paging::{ActivePageTable, Page, VirtualAddress}; -use crate::paging::entry::EntryFlags; -use crate::syscall::error::*; - -fn validate(address: usize, size: usize, flags: EntryFlags) -> Result<()> { - let end_offset = size.checked_sub(1).ok_or(Error::new(EFAULT))?; - let end_address = address.checked_add(end_offset).ok_or(Error::new(EFAULT))?; - - let active_table = unsafe { ActivePageTable::new() }; - - let start_page = Page::containing_address(VirtualAddress::new(address)); - let end_page = Page::containing_address(VirtualAddress::new(end_address)); - for page in Page::range_inclusive(start_page, end_page) { - if let Some(page_flags) = active_table.translate_page_flags(page) { - if ! page_flags.contains(flags) { - //println!("{:X}: Not {:?}", page.start_address().get(), flags); - return Err(Error::new(EFAULT)); - } - } else { - //println!("{:X}: Not found", page.start_address().get()); - return Err(Error::new(EFAULT)); - } - } - - Ok(()) -} - -/// Convert a pointer and length to slice, if valid -pub fn validate_slice(ptr: *const T, len: usize) -> Result<&'static [T]> { - if len == 0 { - Ok(&[]) - } else { - validate(ptr as usize, len * mem::size_of::(), EntryFlags::PRESENT | EntryFlags::USER_ACCESSIBLE)?; - Ok(unsafe { slice::from_raw_parts(ptr, len) }) - } -} - -/// Convert a pointer and length to slice, if valid -pub fn validate_slice_mut(ptr: *mut T, len: usize) -> Result<&'static mut [T]> { - if len == 0 { - Ok(&mut []) - } else { - validate(ptr as usize, len * mem::size_of::(), EntryFlags::PRESENT | EntryFlags::WRITABLE | EntryFlags::USER_ACCESSIBLE)?; - Ok(unsafe { slice::from_raw_parts_mut(ptr, len) }) - } -} diff --git a/src/tests/mod.rs b/src/tests/mod.rs deleted file mode 100644 index 3432dcd8..00000000 --- a/src/tests/mod.rs +++ /dev/null @@ -1,29 +0,0 @@ -use syscall::{self, Error}; - -/// Test stdio -#[test] -fn stdio() { - // Test opening stdin - assert_eq!(syscall::open(b"debug:", 0), Ok(0)); - - // Test opening stdout - assert_eq!(syscall::open(b"debug:", 0), Ok(1)); - - // Test opening stderr - assert_eq!(syscall::open(b"debug:", 0), Ok(2)); - - // Test writing stdout - let stdout_str = b"STDOUT"; - assert_eq!(syscall::write(1, stdout_str), Ok(stdout_str.len())); - - // Test writing stderr - let stderr_str = b"STDERR"; - assert_eq!(syscall::write(2, stderr_str), Ok(stderr_str.len())); -} - -/// Test that invalid reads/writes cause errors -#[test] -fn invalid_path() { - assert_eq!(syscall::read(999, &mut []), Err(Error::new(syscall::EBADF))); - assert_eq!(syscall::write(999, &[]), Err(Error::new(syscall::EBADF))); -} diff --git a/src/time.rs b/src/time.rs index 6fd7720a..2de43b41 100644 --- a/src/time.rs +++ b/src/time.rs @@ -1,17 +1,25 @@ use spin::Mutex; -/// Kernel start time, measured in (seconds, nanoseconds) since Unix epoch -pub static START: Mutex<(u64, u64)> = Mutex::new((0, 0)); -/// Kernel up time, measured in (seconds, nanoseconds) since `START_TIME` -pub static OFFSET: Mutex<(u64, u64)> = Mutex::new((0, 0)); +use crate::syscall::error::{Error, Result, EINVAL}; -pub fn monotonic() -> (u64, u64) { - *OFFSET.lock() +pub const NANOS_PER_SEC: u128 = 1_000_000_000; + +// TODO: seqlock? +/// Kernel start time, measured in nanoseconds since Unix epoch +pub static START: Mutex = Mutex::new(0); +/// Kernel up time, measured in nanoseconds since `START_TIME` +pub static OFFSET: Mutex = Mutex::new(0); + +pub fn monotonic() -> u128 { + crate::arch::time::monotonic_absolute() +} + +pub fn realtime() -> u128 { + *START.lock() + monotonic() } -pub fn realtime() -> (u64, u64) { - let offset = monotonic(); - let start = *START.lock(); - let sum = start.1 + offset.1; - (start.0 + offset.0 + sum / 1_000_000_000, sum % 1_000_000_000) +pub fn sys_update_time_offset(buf: &[u8]) -> Result { + let start = <[u8; 16]>::try_from(buf).map_err(|_| Error::new(EINVAL))?; + *START.lock() = u128::from_ne_bytes(start); + Ok(16) } diff --git a/syscall b/syscall deleted file mode 160000 index 9e9f47d2..00000000 --- a/syscall +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9e9f47d2a570c55dd96cd80c83bc818d63cab8af diff --git a/targets/aarch64-unknown-kernel.json b/targets/aarch64-unknown-kernel.json new file mode 100644 index 00000000..74e4b9f0 --- /dev/null +++ b/targets/aarch64-unknown-kernel.json @@ -0,0 +1,24 @@ +{ + "llvm-target": "aarch64-unknown-none", + "abi": "softfloat", + "target-endian": "little", + "target-pointer-width": "64", + "target-c-int-width": "32", + "data-layout": "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32", + "arch": "aarch64", + "os": "none", + "env": "", + "vendor": "unknown", + "linker": "rust-lld", + "linker-flavor": "gnu-lld", + "features": "+strict-align,-neon,-fp-armv8,+tpidr-el1", + "dynamic-linking": false, + "executables": true, + "relocation-model": "pic", + "disable-redzone": true, + "frame-pointer": "always", + "exe-suffix": "", + "has-rpath": false, + "no-default-libraries": true, + "position-independent-executables": false +} diff --git a/targets/aarch64-unknown-none.json b/targets/aarch64-unknown-none.json deleted file mode 100644 index e819a5ef..00000000 --- a/targets/aarch64-unknown-none.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "llvm-target": "aarch64-unknown-none", - "target-endian": "little", - "target-pointer-width": "64", - "target-c-int-width": "32", - "data-layout": "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", - "arch": "aarch64", - "os": "none", - "env": "", - "vendor": "unknown", - "linker-flavor": "gcc", - "target-family": "redox", - "pre-link-args": ["-m64", "-nostdlib", "-static"], - "features": "+a53,+strict-align,-fp-armv8", - "dynamic-linking": false, - "executables": false, - "relocation-model": "pic", - "code-model": "large", - "disable-redzone": true, - "eliminate-frame-pointer": false, - "exe-suffix": "", - "has-rpath": false, - "no-compiler-rt": true, - "no-default-libraries": true, - "position-independent-executables": false, - "has-elf-tls": true -} diff --git a/targets/arm-unknown-none.json b/targets/arm-unknown-none.json deleted file mode 100644 index dad0ed5a..00000000 --- a/targets/arm-unknown-none.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "llvm-target": "arm-unknown-none", - "target-endian": "little", - "target-pointer-width": "32", - "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", - "arch": "arm", - "os": "none", - "env": "", - "vendor": "unknown", - "linker-flavor": "gcc", - "target-family": "redox", - "pre-link-args": ["-nostdlib", "-static"], - "features": "+soft-float", - "dynamic-linking": false, - "executables": false, - "relocation-model": "static", - "code-model": "kernel", - "disable-redzone": true, - "eliminate-frame-pointer": false, - "exe-suffix": "", - "has-rpath": false, - "no-compiler-rt": true, - "no-default-libraries": true, - "position-independent-executables": false, - "has-elf-tls": true -} diff --git a/targets/i686-unknown-kernel.json b/targets/i686-unknown-kernel.json new file mode 100644 index 00000000..088ec6b8 --- /dev/null +++ b/targets/i686-unknown-kernel.json @@ -0,0 +1,24 @@ +{ + "llvm-target": "i686-unknown-none", + "target-endian": "little", + "target-pointer-width": "32", + "target-c-int-width": "32", + "data-layout": "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", + "arch": "x86", + "os": "none", + "env": "", + "vendor": "unknown", + "linker": "rust-lld", + "linker-flavor": "gnu-lld", + "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", + "dynamic-linking": false, + "executables": true, + "relocation-model": "static", + "code-model": "kernel", + "disable-redzone": true, + "frame-pointer": "always", + "exe-suffix": "", + "has-rpath": false, + "no-default-libraries": true, + "position-independent-executables": false +} diff --git a/targets/riscv64-unknown-kernel.json b/targets/riscv64-unknown-kernel.json new file mode 100644 index 00000000..c9ac6f62 --- /dev/null +++ b/targets/riscv64-unknown-kernel.json @@ -0,0 +1,24 @@ +{ + "llvm-target": "riscv64-unknown-none", + "llvm-abiname": "lp64", + "target-endian": "little", + "target-pointer-width": "64", + "target-c-int-width": "32", + "data-layout": "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", + "arch": "riscv64", + "os": "none", + "env": "", + "vendor": "unknown", + "linker": "rust-lld", + "linker-flavor": "gnu-lld", + "features": "+m,+a,+c,+zihintpause", + "dynamic-linking": false, + "executables": true, + "relocation-model": "pic", + "disable-redzone": true, + "frame-pointer": "always", + "exe-suffix": "", + "has-rpath": false, + "no-default-libraries": true, + "position-independent-executables": false +} diff --git a/targets/x86_64-unknown-kernel.json b/targets/x86_64-unknown-kernel.json new file mode 100644 index 00000000..848ef0e1 --- /dev/null +++ b/targets/x86_64-unknown-kernel.json @@ -0,0 +1,24 @@ +{ + "llvm-target": "x86_64-unknown-none", + "target-endian": "little", + "target-pointer-width": "64", + "target-c-int-width": "32", + "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", + "arch": "x86_64", + "os": "none", + "env": "", + "vendor": "unknown", + "linker": "rust-lld", + "linker-flavor": "gnu-lld", + "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,+soft-float", + "dynamic-linking": false, + "executables": true, + "relocation-model": "static", + "code-model": "kernel", + "disable-redzone": true, + "frame-pointer": "always", + "exe-suffix": "", + "has-rpath": false, + "no-default-libraries": true, + "position-independent-executables": false +} diff --git a/targets/x86_64-unknown-none.json b/targets/x86_64-unknown-none.json deleted file mode 100644 index b64c08b6..00000000 --- a/targets/x86_64-unknown-none.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "llvm-target": "x86_64-unknown-none", - "target-endian": "little", - "target-pointer-width": "64", - "target-c-int-width": "32", - "data-layout": "e-m:e-i64:64-f80:128-n8:16:32:64-S128", - "arch": "x86_64", - "os": "none", - "env": "", - "vendor": "unknown", - "linker-flavor": "gcc", - "target-family": "redox", - "pre-link-args": { - "gcc": ["-m64", "-nostdlib", "-static"] - }, - "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", - "dynamic-linking": false, - "executables": false, - "relocation-model": "pic", - "code-model": "kernel", - "disable-redzone": true, - "eliminate-frame-pointer": false, - "exe-suffix": "", - "has-rpath": false, - "no-compiler-rt": true, - "no-default-libraries": true, - "position-independent-executables": false, - "has-elf-tls": true -}