diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 5e6b74f30406..1e7e0bb4c14e 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -52,6 +52,9 @@ Description: echo 0 > /sys/class/devfreq/.../trans_stat + If the transition table is bigger than PAGE_SIZE, reading + this will return an -EFBIG error. + What: /sys/class/devfreq/.../available_frequencies Date: October 2012 Contact: Nishanth Menon diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 978b76358661..40d5aab8452d 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -1,4 +1,4 @@ -What: /sys/class//queues/rx-/rps_cpus +What: /sys/class/net//queues/rx-/rps_cpus Date: March 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -8,7 +8,7 @@ Description: network device queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/rx-/rps_flow_cnt +What: /sys/class/net//queues/rx-/rps_flow_cnt Date: April 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -16,7 +16,7 @@ Description: Number of Receive Packet Steering flows being currently processed by this particular network device receive queue. -What: /sys/class//queues/tx-/tx_timeout +What: /sys/class/net//queues/tx-/tx_timeout Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -24,7 +24,7 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. -What: /sys/class//queues/tx-/tx_maxrate +What: /sys/class/net//queues/tx-/tx_maxrate Date: March 2015 KernelVersion: 4.1 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: A Mbps max-rate set for the queue, a value of zero means disabled, default is disabled. -What: /sys/class//queues/tx-/xps_cpus +What: /sys/class/net//queues/tx-/xps_cpus Date: November 2010 KernelVersion: 2.6.38 Contact: netdev@vger.kernel.org @@ -42,7 +42,7 @@ Description: network device transmit queue. Possible vaules depend on the number of available CPU(s) in the system. -What: /sys/class//queues/tx-/xps_rxqs +What: /sys/class/net//queues/tx-/xps_rxqs Date: June 2018 KernelVersion: 4.18.0 Contact: netdev@vger.kernel.org @@ -53,7 +53,7 @@ Description: number of available receive queue(s) in the network device. Default is disabled. -What: /sys/class//queues/tx-/byte_queue_limits/hold_time +What: /sys/class/net//queues/tx-/byte_queue_limits/hold_time Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -62,7 +62,7 @@ Description: of this particular network device transmit queue. Default value is 1000. -What: /sys/class//queues/tx-/byte_queue_limits/inflight +What: /sys/class/net//queues/tx-/byte_queue_limits/inflight Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -70,7 +70,7 @@ Description: Indicates the number of bytes (objects) in flight on this network device transmit queue. -What: /sys/class//queues/tx-/byte_queue_limits/limit +What: /sys/class/net//queues/tx-/byte_queue_limits/limit Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -79,7 +79,7 @@ Description: on this network device transmit queue. This value is clamped to be within the bounds defined by limit_max and limit_min. -What: /sys/class//queues/tx-/byte_queue_limits/limit_max +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_max Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -88,7 +88,7 @@ Description: queued on this network device transmit queue. See include/linux/dynamic_queue_limits.h for the default value. -What: /sys/class//queues/tx-/byte_queue_limits/limit_min +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_min Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics index 55db27815361..53e508c6936a 100644 --- a/Documentation/ABI/testing/sysfs-class-net-statistics +++ b/Documentation/ABI/testing/sysfs-class-net-statistics @@ -1,4 +1,4 @@ -What: /sys/class//statistics/collisions +What: /sys/class/net//statistics/collisions Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -6,7 +6,7 @@ Description: Indicates the number of collisions seen by this network device. This value might not be relevant with all MAC layers. -What: /sys/class//statistics/multicast +What: /sys/class/net//statistics/multicast Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -14,7 +14,7 @@ Description: Indicates the number of multicast packets received by this network device. -What: /sys/class//statistics/rx_bytes +What: /sys/class/net//statistics/rx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -23,7 +23,7 @@ Description: See the network driver for the exact meaning of when this value is incremented. -What: /sys/class//statistics/rx_compressed +What: /sys/class/net//statistics/rx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: network device. This value might only be relevant for interfaces that support packet compression (e.g: PPP). -What: /sys/class//statistics/rx_crc_errors +What: /sys/class/net//statistics/rx_crc_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -41,7 +41,7 @@ Description: by this network device. Note that the specific meaning might depend on the MAC layer used by the interface. -What: /sys/class//statistics/rx_dropped +What: /sys/class/net//statistics/rx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -51,7 +51,7 @@ Description: packet processing. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_errors +What: /sys/class/net//statistics/rx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -59,7 +59,7 @@ Description: Indicates the number of receive errors on this network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_fifo_errors +What: /sys/class/net//statistics/rx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -68,7 +68,7 @@ Description: network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_frame_errors +What: /sys/class/net//statistics/rx_frame_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -78,7 +78,7 @@ Description: on the MAC layer protocol used. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_length_errors +What: /sys/class/net//statistics/rx_length_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -87,7 +87,7 @@ Description: error, oversized or undersized. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_missed_errors +What: /sys/class/net//statistics/rx_missed_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -96,7 +96,7 @@ Description: due to lack of capacity in the receive side. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_nohandler +What: /sys/class/net//statistics/rx_nohandler Date: February 2016 KernelVersion: 4.6 Contact: netdev@vger.kernel.org @@ -104,7 +104,7 @@ Description: Indicates the number of received packets that were dropped on an inactive device by the network core. -What: /sys/class//statistics/rx_over_errors +What: /sys/class/net//statistics/rx_over_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -114,7 +114,7 @@ Description: (e.g: larger than MTU). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_packets +What: /sys/class/net//statistics/rx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -122,7 +122,7 @@ Description: Indicates the total number of good packets received by this network device. -What: /sys/class//statistics/tx_aborted_errors +What: /sys/class/net//statistics/tx_aborted_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -132,7 +132,7 @@ Description: a medium collision). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_bytes +What: /sys/class/net//statistics/tx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -143,7 +143,7 @@ Description: transmitted packets or all packets that have been queued for transmission. -What: /sys/class//statistics/tx_carrier_errors +What: /sys/class/net//statistics/tx_carrier_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -152,7 +152,7 @@ Description: because of carrier errors (e.g: physical link down). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_compressed +What: /sys/class/net//statistics/tx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -161,7 +161,7 @@ Description: this might only be relevant for devices that support compression (e.g: PPP). -What: /sys/class//statistics/tx_dropped +What: /sys/class/net//statistics/tx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -170,7 +170,7 @@ Description: See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_errors +What: /sys/class/net//statistics/tx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -179,7 +179,7 @@ Description: a network device. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_fifo_errors +What: /sys/class/net//statistics/tx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -188,7 +188,7 @@ Description: FIFO error. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_heartbeat_errors +What: /sys/class/net//statistics/tx_heartbeat_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -197,7 +197,7 @@ Description: reported as heartbeat errors. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_packets +What: /sys/class/net//statistics/tx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -206,7 +206,7 @@ Description: device. See the driver for whether this reports the number of all attempted or successful transmissions. -What: /sys/class//statistics/tx_window_errors +What: /sys/class/net//statistics/tx_window_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 13c01b641dc7..78c26280c473 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -519,6 +519,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling /sys/devices/system/cpu/vulnerabilities/retbleed /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/spectre_v1 diff --git a/Documentation/admin-guide/abi-obsolete.rst b/Documentation/admin-guide/abi-obsolete.rst index d095867899c5..594e697aa1b2 100644 --- a/Documentation/admin-guide/abi-obsolete.rst +++ b/Documentation/admin-guide/abi-obsolete.rst @@ -7,5 +7,5 @@ marked to be removed at some later point in time. The description of the interface will document the reason why it is obsolete and when it can be expected to be removed. -.. kernel-abi:: $srctree/Documentation/ABI/obsolete +.. kernel-abi:: ABI/obsolete :rst: diff --git a/Documentation/admin-guide/abi-removed.rst b/Documentation/admin-guide/abi-removed.rst index f7e9e43023c1..f9e000c81828 100644 --- a/Documentation/admin-guide/abi-removed.rst +++ b/Documentation/admin-guide/abi-removed.rst @@ -1,5 +1,5 @@ ABI removed symbols =================== -.. kernel-abi:: $srctree/Documentation/ABI/removed +.. kernel-abi:: ABI/removed :rst: diff --git a/Documentation/admin-guide/abi-stable.rst b/Documentation/admin-guide/abi-stable.rst index 70490736e0d3..fc3361d847b1 100644 --- a/Documentation/admin-guide/abi-stable.rst +++ b/Documentation/admin-guide/abi-stable.rst @@ -10,5 +10,5 @@ for at least 2 years. Most interfaces (like syscalls) are expected to never change and always be available. -.. kernel-abi:: $srctree/Documentation/ABI/stable +.. kernel-abi:: ABI/stable :rst: diff --git a/Documentation/admin-guide/abi-testing.rst b/Documentation/admin-guide/abi-testing.rst index b205b16a72d0..19767926b344 100644 --- a/Documentation/admin-guide/abi-testing.rst +++ b/Documentation/admin-guide/abi-testing.rst @@ -16,5 +16,5 @@ Programs that use these interfaces are strongly encouraged to add their name to the description of these interfaces, so that the kernel developers can easily notify them if any changes occur. -.. kernel-abi:: $srctree/Documentation/ABI/testing +.. kernel-abi:: ABI/testing :rst: diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 6828102baaa7..3e4a14e38b49 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -21,3 +21,4 @@ are configurable at compile, boot or run time. cross-thread-rsb.rst gather_data_sampling.rst srso + reg-file-data-sampling diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst new file mode 100644 index 000000000000..0585d02b9a6c --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst @@ -0,0 +1,104 @@ +================================== +Register File Data Sampling (RFDS) +================================== + +Register File Data Sampling (RFDS) is a microarchitectural vulnerability that +only affects Intel Atom parts(also branded as E-cores). RFDS may allow +a malicious actor to infer data values previously used in floating point +registers, vector registers, or integer registers. RFDS does not provide the +ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS. + +Affected Processors +=================== +Below is the list of affected Intel processors [#f1]_: + + =================== ============ + Common name Family_Model + =================== ============ + ATOM_GOLDMONT 06_5CH + ATOM_GOLDMONT_D 06_5FH + ATOM_GOLDMONT_PLUS 06_7AH + ATOM_TREMONT_D 06_86H + ATOM_TREMONT 06_96H + ALDERLAKE 06_97H + ALDERLAKE_L 06_9AH + ATOM_TREMONT_L 06_9CH + RAPTORLAKE 06_B7H + RAPTORLAKE_P 06_BAH + ATOM_GRACEMONT 06_BEH + RAPTORLAKE_S 06_BFH + =================== ============ + +As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and +RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as +vulnerable in Linux because they share the same family/model with an affected +part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or +CPUID.HYBRID. This information could be used to distinguish between the +affected and unaffected parts, but it is deemed not worth adding complexity as +the reporting is fixed automatically when these parts enumerate RFDS_NO. + +Mitigation +========== +Intel released a microcode update that enables software to clear sensitive +information using the VERW instruction. Like MDS, RFDS deploys the same +mitigation strategy to force the CPU to clear the affected buffers before an +attacker can extract the secrets. This is achieved by using the otherwise +unused and obsolete VERW instruction in combination with a microcode update. +The microcode clears the affected CPU buffers when the VERW instruction is +executed. + +Mitigation points +----------------- +VERW is executed by the kernel before returning to user space, and by KVM +before VMentry. None of the affected cores support SMT, so VERW is not required +at C-state transitions. + +New bits in IA32_ARCH_CAPABILITIES +---------------------------------- +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +vulnerability and mitigation capability: + +- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS. +- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the + microcode that clears the affected buffers on VERW execution. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control RFDS mitigation at boot time with the +parameter "reg_file_data_sampling=". The valid arguments are: + + ========== ================================================================= + on If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and before entering a VM. + off Disables mitigation. + ========== ================================================================= + +Mitigation default is selected by CONFIG_MITIGATION_RFDS. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: No microcode' + - The processor is vulnerable but microcode is not updated. + * - 'Mitigation: Clear Register File' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index a39bbfe9526b..32a8893e5617 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -484,11 +484,14 @@ Spectre variant 2 Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at boot, by setting the IBRS bit, and they're automatically protected against - Spectre v2 variant attacks, including cross-thread branch target injections - on SMT systems (STIBP). In other words, eIBRS enables STIBP too. + Spectre v2 variant attacks. - Legacy IBRS systems clear the IBRS bit on exit to userspace and - therefore explicitly enable STIBP for that + On Intel's enhanced IBRS systems, this includes cross-thread branch target + injections on SMT systems (STIBP). In other words, Intel eIBRS enables + STIBP, too. + + AMD Automatic IBRS does not protect userspace, and Legacy IBRS systems clear + the IBRS bit on exit to userspace, therefore both explicitly enable STIBP. The retpoline mitigation is turned on by default on vulnerable CPUs. It can be forced on or off by the administrator @@ -621,9 +624,9 @@ kernel command line. retpoline,generic Retpolines retpoline,lfence LFENCE; indirect branch retpoline,amd alias for retpoline,lfence - eibrs enhanced IBRS - eibrs,retpoline enhanced IBRS + Retpolines - eibrs,lfence enhanced IBRS + LFENCE + eibrs Enhanced/Auto IBRS + eibrs,retpoline Enhanced/Auto IBRS + Retpolines + eibrs,lfence Enhanced/Auto IBRS + LFENCE ibrs use IBRS to protect kernel Not specifying this option is equivalent to diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4ad60e127e04..88dffaf8f0a9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1107,6 +1107,26 @@ The filter can be disabled or changed to another driver later using sysfs. + reg_file_data_sampling= + [X86] Controls mitigation for Register File Data + Sampling (RFDS) vulnerability. RFDS is a CPU + vulnerability which may allow userspace to infer + kernel data values previously stored in floating point + registers, vector registers, or integer registers. + RFDS only affects Intel Atom processors. + + on: Turns ON the mitigation. + off: Turns OFF the mitigation. + + This parameter overrides the compile time default set + by CONFIG_MITIGATION_RFDS. Mitigation cannot be + disabled when other VERW based mitigations (like MDS) + are enabled. In order to disable RFDS mitigation all + VERW based mitigations need to be disabled. + + For details see: + Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst + driver_async_probe= [KNL] List of driver names to be probed asynchronously. * matches with all driver names. If * is specified, the @@ -3186,9 +3206,7 @@ mem_encrypt= [X86-64] AMD Secure Memory Encryption (SME) control Valid arguments: on, off - Default (depends on kernel configuration option): - on (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) - off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n) + Default: off mem_encrypt=on: Activate SME mem_encrypt=off: Do not activate SME @@ -3262,6 +3280,7 @@ nospectre_bhb [ARM64] nospectre_v1 [X86,PPC] nospectre_v2 [X86,PPC,S390,ARM64] + reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] spectre_v2_user=off [X86] @@ -5744,9 +5763,9 @@ retpoline,generic - Retpolines retpoline,lfence - LFENCE; indirect branch retpoline,amd - alias for retpoline,lfence - eibrs - enhanced IBRS - eibrs,retpoline - enhanced IBRS + Retpolines - eibrs,lfence - enhanced IBRS + LFENCE + eibrs - Enhanced/Auto IBRS + eibrs,retpoline - Enhanced/Auto IBRS + Retpolines + eibrs,lfence - Enhanced/Auto IBRS + LFENCE ibrs - use IBRS to protect kernel Not specifying this option is equivalent to diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d9fce65b2f04..27135b9c07ac 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -221,3 +221,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt b/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt index 9bf9bbac16e2..cdc303caf5f4 100644 --- a/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt +++ b/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt @@ -1,4 +1,4 @@ -Marvell 8787/8897/8997 (sd8787/sd8897/sd8997/pcie8997) SDIO/PCIE devices +Marvell 8787/8897/8978/8997 (sd8787/sd8897/sd8978/sd8997/pcie8997) SDIO/PCIE devices ------ This node provides properties for controlling the Marvell SDIO/PCIE wireless device. @@ -10,7 +10,9 @@ Required properties: - compatible : should be one of the following: * "marvell,sd8787" * "marvell,sd8897" + * "marvell,sd8978" * "marvell,sd8997" + * "nxp,iw416" * "pci11ab,2b42" * "pci1b4b,2b42" diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst index dccd61c7c5c3..193c22687851 100644 --- a/Documentation/filesystems/directory-locking.rst +++ b/Documentation/filesystems/directory-locking.rst @@ -22,13 +22,16 @@ exclusive. 3) object removal. Locking rules: caller locks parent, finds victim, locks victim and calls the method. Locks are exclusive. -4) rename() that is _not_ cross-directory. Locking rules: caller locks the -parent and finds source and target. We lock both (provided they exist). If we -need to lock two inodes of different type (dir vs non-dir), we lock directory -first. If we need to lock two inodes of the same type, lock them in inode -pointer order. Then call the method. All locks are exclusive. -NB: we might get away with locking the source (and target in exchange -case) shared. +4) rename() that is _not_ cross-directory. Locking rules: caller locks +the parent and finds source and target. Then we decide which of the +source and target need to be locked. Source needs to be locked if it's a +non-directory; target - if it's a non-directory or about to be removed. +Take the locks that need to be taken, in inode pointer order if need +to take both (that can happen only when both source and target are +non-directories - the source because it wouldn't be locked otherwise +and the target because mixing directory and non-directory is allowed +only with RENAME_EXCHANGE, and that won't be removing the target). +After the locks had been taken, call the method. All locks are exclusive. 5) link creation. Locking rules: @@ -44,20 +47,17 @@ rules: * lock the filesystem * lock parents in "ancestors first" order. If one is not ancestor of - the other, lock them in inode pointer order. + the other, lock the parent of source first. * find source and target. * if old parent is equal to or is a descendent of target fail with -ENOTEMPTY * if new parent is equal to or is a descendent of source fail with -ELOOP - * Lock both the source and the target provided they exist. If we - need to lock two inodes of different type (dir vs non-dir), we lock - the directory first. If we need to lock two inodes of the same type, - lock them in inode pointer order. + * Lock subdirectories involved (source before target). + * Lock non-directories involved, in inode pointer order. * call the method. -All ->i_rwsem are taken exclusive. Again, we might get away with locking -the source (and target in exchange case) shared. +All ->i_rwsem are taken exclusive. The rules above obviously guarantee that all directories that are going to be read, modified or removed by method will be locked by caller. @@ -67,6 +67,7 @@ If no directory is its own ancestor, the scheme above is deadlock-free. Proof: +[XXX: will be updated once we are done massaging the lock_rename()] First of all, at any moment we have a linear ordering of the objects - A < B iff (A is an ancestor of B) or (B is not an ancestor of A and ptr(A) < ptr(B)). diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 8f737e76935c..555215c416d9 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -99,7 +99,7 @@ symlink: exclusive mkdir: exclusive unlink: exclusive (both) rmdir: exclusive (both)(see below) -rename: exclusive (all) (see below) +rename: exclusive (both parents, some children) (see below) readlink: no get_link: no setattr: exclusive @@ -119,6 +119,9 @@ fileattr_set: exclusive Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem exclusive on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. + ->unlink() and ->rename() have ->i_rwsem exclusive on all non-directories + involved. + ->rename() has ->i_rwsem exclusive on any subdirectory that changes parent. See Documentation/filesystems/directory-locking.rst for more detailed discussion of the locking scheme for directory operations. diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index df0dc37e6f58..c6aaf340f179 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -943,3 +943,21 @@ file pointer instead of struct dentry pointer. d_tmpfile() is similarly changed to simplify callers. The passed file is in a non-open state and on success must be opened before returning (e.g. by calling finish_open_simple()). + +--- + +**mandatory** + +If ->rename() update of .. on cross-directory move needs an exclusion with +directory modifications, do *not* lock the subdirectory in question in your +->rename() - it's done by the caller now [that item should've been added in +28eceeda130f "fs: Lock moved directories"]. + +--- + +**mandatory** + +On same-directory ->rename() the (tautological) update of .. is not protected +by any locks; just don't do it if the old parent is the same as the new one. +We really can't lock two subdirectories in same-directory rename - not without +deadlocks. diff --git a/Documentation/sound/soc/dapm.rst b/Documentation/sound/soc/dapm.rst index 8e44107933ab..c3154ce6e1b2 100644 --- a/Documentation/sound/soc/dapm.rst +++ b/Documentation/sound/soc/dapm.rst @@ -234,7 +234,7 @@ corresponding soft power control. In this case it is necessary to create a virtual widget - a widget with no control bits e.g. :: - SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_DAPM_NOPM, 0, 0, NULL, 0), + SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), This can be used to merge to signal paths together in software. diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index b5feb5b1d905..6d8a637ad566 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -39,8 +39,6 @@ import sys import re import kernellog -from os import path - from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive @@ -73,60 +71,26 @@ class KernelCmd(Directive): } def run(self): - doc = self.state.document if not doc.settings.file_insertion_enabled: raise self.warning("docutils: file insertion disabled") - env = doc.settings.env - cwd = path.dirname(doc.current_source) - cmd = "get_abi.pl rest --enable-lineno --dir " - cmd += self.arguments[0] + srctree = os.path.abspath(os.environ["srctree"]) + + args = [ + os.path.join(srctree, 'scripts/get_abi.pl'), + 'rest', + '--enable-lineno', + '--dir', os.path.join(srctree, 'Documentation', self.arguments[0]), + ] if 'rst' in self.options: - cmd += " --rst-source" + args.append('--rst-source') - srctree = path.abspath(os.environ["srctree"]) - - fname = cmd - - # extend PATH with $(srctree)/scripts - path_env = os.pathsep.join([ - srctree + os.sep + "scripts", - os.environ["PATH"] - ]) - shell_env = os.environ.copy() - shell_env["PATH"] = path_env - shell_env["srctree"] = srctree - - lines = self.runCmd(cmd, shell=True, cwd=cwd, env=shell_env) + lines = subprocess.check_output(args, cwd=os.path.dirname(doc.current_source)).decode('utf-8') nodeList = self.nestedParse(lines, self.arguments[0]) return nodeList - def runCmd(self, cmd, **kwargs): - u"""Run command ``cmd`` and return its stdout as unicode.""" - - try: - proc = subprocess.Popen( - cmd - , stdout = subprocess.PIPE - , stderr = subprocess.PIPE - , **kwargs - ) - out, err = proc.communicate() - - out, err = codecs.decode(out, 'utf-8'), codecs.decode(err, 'utf-8') - - if proc.returncode != 0: - raise self.severe( - u"command '%s' failed with return code %d" - % (cmd, proc.returncode) - ) - except OSError as exc: - raise self.severe(u"problems with '%s' directive: %s." - % (self.name, ErrorString(exc))) - return out - def nestedParse(self, lines, fname): env = self.state.document.settings.env content = ViewList() diff --git a/Documentation/userspace-api/media/mediactl/media-types.rst b/Documentation/userspace-api/media/mediactl/media-types.rst index 0ffeece1e0c8..6332e8395263 100644 --- a/Documentation/userspace-api/media/mediactl/media-types.rst +++ b/Documentation/userspace-api/media/mediactl/media-types.rst @@ -375,12 +375,11 @@ Types and flags used to represent the media graph elements are origins of links. * - ``MEDIA_PAD_FL_MUST_CONNECT`` - - If this flag is set and the pad is linked to any other pad, then - at least one of those links must be enabled for the entity to be - able to stream. There could be temporary reasons (e.g. device - configuration dependent) for the pad to need enabled links even - when this flag isn't set; the absence of the flag doesn't imply - there is none. + - If this flag is set, then for this pad to be able to stream, it must + be connected by at least one enabled link. There could be temporary + reasons (e.g. device configuration dependent) for the pad to need + enabled links even when this flag isn't set; the absence of the flag + doesn't imply there is none. One and only one of ``MEDIA_PAD_FL_SINK`` and ``MEDIA_PAD_FL_SOURCE`` diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst index 934310ce7258..bace87cc9ca2 100644 --- a/Documentation/x86/amd-memory-encryption.rst +++ b/Documentation/x86/amd-memory-encryption.rst @@ -87,14 +87,14 @@ The state of SME in the Linux kernel can be documented as follows: kernel is non-zero). SME can also be enabled and activated in the BIOS. If SME is enabled and -activated in the BIOS, then all memory accesses will be encrypted and it will -not be necessary to activate the Linux memory encryption support. If the BIOS -merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate -memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or -by supplying mem_encrypt=on on the kernel command line. However, if BIOS does -not enable SME, then Linux will not be able to activate memory encryption, even -if configured to do so by default or the mem_encrypt=on command line parameter -is specified. +activated in the BIOS, then all memory accesses will be encrypted and it +will not be necessary to activate the Linux memory encryption support. + +If the BIOS merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), +then memory encryption can be enabled by supplying mem_encrypt=on on the +kernel command line. However, if BIOS does not enable SME, then Linux +will not be able to activate memory encryption, even if configured to do +so by default or the mem_encrypt=on command line parameter is specified. Secure Nested Paging (SNP) ========================== diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst index 894a19897005..bac3789f3e8f 100644 --- a/Documentation/x86/boot.rst +++ b/Documentation/x86/boot.rst @@ -1416,7 +1416,7 @@ execution context provided by the EFI firmware. The function prototype for the handover entry point looks like this:: - efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp) + efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp) 'handle' is the EFI image handle passed to the boot loader by the EFI firmware, 'table' is the EFI system table - these are the first two diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 5d4330be200f..e801df0bb3a8 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/MAINTAINERS b/MAINTAINERS index 13d1078808bb..bbfedb0b2093 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10051,6 +10051,7 @@ F: drivers/infiniband/ F: include/rdma/ F: include/trace/events/ib_mad.h F: include/trace/events/ib_umad.h +F: include/trace/misc/rdma.h F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: samples/bpf/ibumad_kern.c @@ -11139,6 +11140,12 @@ F: fs/nfs_common/ F: fs/nfsd/ F: include/linux/lockd/ F: include/linux/sunrpc/ +F: include/trace/events/rpcgss.h +F: include/trace/events/rpcrdma.h +F: include/trace/events/sunrpc.h +F: include/trace/misc/fs.h +F: include/trace/misc/nfs.h +F: include/trace/misc/sunrpc.h F: include/uapi/linux/nfsd/ F: include/uapi/linux/sunrpc/ F: net/sunrpc/ diff --git a/Makefile b/Makefile index 7cd49d9eadbf..0e33150db2bf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 75 +SUBLEVEL = 84 EXTRAVERSION = NAME = Curry Ramen @@ -459,8 +459,7 @@ HOSTRUSTC = rustc HOSTPKG_CONFIG = pkg-config KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ - -O2 -fomit-frame-pointer -std=gnu11 \ - -Wdeclaration-after-statement + -O2 -fomit-frame-pointer -std=gnu11 KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS) KBUILD_USERLDFLAGS := $(USERLDFLAGS) @@ -1018,9 +1017,6 @@ endif # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -# warn about C99 declaration after statement -KBUILD_CFLAGS += -Wdeclaration-after-statement - # Variable Length Arrays (VLAs) should not be used anywhere in the kernel KBUILD_CFLAGS += -Wvla diff --git a/arch/Kconfig b/arch/Kconfig index 14273a6203df..f99fd9a4ca77 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -642,6 +642,7 @@ config SHADOW_CALL_STACK bool "Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + depends on MMU help This option enables the compiler's Shadow Call Stack, which uses a shadow stack to protect function return addresses from diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index fb3025396ac9..cfdf90bc8b3f 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -80,7 +80,7 @@ init_rtc_epoch(void) static int alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) { - int ret = mc146818_get_time(tm); + int ret = mc146818_get_time(tm, 10); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index 9d9618079739..a339223d9e05 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -31,7 +31,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "nop \n" ".pushsection __jump_table, \"aw\" \n" @@ -47,7 +47,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "b %l[l_yes] \n" ".pushsection __jump_table, \"aw\" \n" diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts index efed325af88d..d99bac02232b 100644 --- a/arch/arm/boot/dts/arm-realview-pb1176.dts +++ b/arch/arm/boot/dts/arm-realview-pb1176.dts @@ -451,7 +451,7 @@ /* Direct-mapped development chip ROM */ pb1176_rom@10200000 { - compatible = "direct-mapped"; + compatible = "mtd-rom"; reg = <0x10200000 0x4000>; bank-width = <1>; }; diff --git a/arch/arm/boot/dts/exynos4210-i9100.dts b/arch/arm/boot/dts/exynos4210-i9100.dts index 53e023fc1cac..b4c7b642e456 100644 --- a/arch/arm/boot/dts/exynos4210-i9100.dts +++ b/arch/arm/boot/dts/exynos4210-i9100.dts @@ -521,6 +521,14 @@ regulator-name = "VT_CAM_1.8V"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + + /* + * Force-enable this regulator; otherwise the + * kernel hangs very early in the boot process + * for about 12 seconds, without apparent + * reason. + */ + regulator-always-on; }; vcclcd_reg: LDO13 { diff --git a/arch/arm/boot/dts/imx1-ads.dts b/arch/arm/boot/dts/imx1-ads.dts index 5833fb6f15d8..2c817c4a4c68 100644 --- a/arch/arm/boot/dts/imx1-ads.dts +++ b/arch/arm/boot/dts/imx1-ads.dts @@ -65,7 +65,7 @@ pinctrl-0 = <&pinctrl_weim>; status = "okay"; - nor: nor@0,0 { + nor: flash@0,0 { compatible = "cfi-flash"; reg = <0 0x00000000 0x02000000>; bank-width = <4>; diff --git a/arch/arm/boot/dts/imx1-apf9328.dts b/arch/arm/boot/dts/imx1-apf9328.dts index 77b21aa7a146..27e72b07b517 100644 --- a/arch/arm/boot/dts/imx1-apf9328.dts +++ b/arch/arm/boot/dts/imx1-apf9328.dts @@ -45,7 +45,7 @@ pinctrl-0 = <&pinctrl_weim>; status = "okay"; - nor: nor@0,0 { + nor: flash@0,0 { compatible = "cfi-flash"; reg = <0 0x00000000 0x02000000>; bank-width = <2>; diff --git a/arch/arm/boot/dts/imx1.dtsi b/arch/arm/boot/dts/imx1.dtsi index e312f1e74e2f..4aeb74479f44 100644 --- a/arch/arm/boot/dts/imx1.dtsi +++ b/arch/arm/boot/dts/imx1.dtsi @@ -268,9 +268,12 @@ status = "disabled"; }; - esram: esram@300000 { + esram: sram@300000 { compatible = "mmio-sram"; reg = <0x00300000 0x20000>; + ranges = <0 0x00300000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; }; }; }; diff --git a/arch/arm/boot/dts/imx23-sansa.dts b/arch/arm/boot/dts/imx23-sansa.dts index 46057d9bf555..c2efcc20ae80 100644 --- a/arch/arm/boot/dts/imx23-sansa.dts +++ b/arch/arm/boot/dts/imx23-sansa.dts @@ -175,10 +175,8 @@ #address-cells = <1>; #size-cells = <0>; compatible = "i2c-gpio"; - gpios = < - &gpio1 24 0 /* SDA */ - &gpio1 22 0 /* SCL */ - >; + sda-gpios = <&gpio1 24 0>; + scl-gpios = <&gpio1 22 0>; i2c-gpio,delay-us = <2>; /* ~100 kHz */ }; @@ -186,10 +184,8 @@ #address-cells = <1>; #size-cells = <0>; compatible = "i2c-gpio"; - gpios = < - &gpio0 31 0 /* SDA */ - &gpio0 30 0 /* SCL */ - >; + sda-gpios = <&gpio0 31 0>; + scl-gpios = <&gpio0 30 0>; i2c-gpio,delay-us = <2>; /* ~100 kHz */ touch: touch@20 { diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index 7f4c602454a5..b236d23f8071 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -59,7 +59,7 @@ reg = <0x80000000 0x2000>; }; - dma_apbh: dma-apbh@80004000 { + dma_apbh: dma-controller@80004000 { compatible = "fsl,imx23-dma-apbh"; reg = <0x80004000 0x2000>; interrupts = <0 14 20 0 @@ -414,7 +414,7 @@ status = "disabled"; }; - dma_apbx: dma-apbx@80024000 { + dma_apbx: dma-controller@80024000 { compatible = "fsl,imx23-dma-apbx"; reg = <0x80024000 0x2000>; interrupts = <7 5 9 26 diff --git a/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi b/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi index 0703f62d10d1..93a6e4e680b4 100644 --- a/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi +++ b/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi @@ -27,7 +27,7 @@ pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; - pcf8563@51 { + rtc@51 { compatible = "nxp,pcf8563"; reg = <0x51>; }; diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts index 7d4301b22b90..1ed3fb7b9ce6 100644 --- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts +++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts @@ -16,7 +16,7 @@ bus-width = <18>; display-timings { native-mode = <&qvga_timings>; - qvga_timings: 320x240 { + qvga_timings: timing0 { clock-frequency = <6500000>; hactive = <320>; vactive = <240>; diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts index 80a7f96de4c6..64b2ffac463b 100644 --- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts +++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts @@ -16,7 +16,7 @@ bus-width = <18>; display-timings { native-mode = <&dvi_svga_timings>; - dvi_svga_timings: 800x600 { + dvi_svga_timings: timing0 { clock-frequency = <40000000>; hactive = <800>; vactive = <600>; diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts index 24027a1fb46d..fb074bfdaa8d 100644 --- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts +++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts @@ -16,7 +16,7 @@ bus-width = <18>; display-timings { native-mode = <&dvi_vga_timings>; - dvi_vga_timings: 640x480 { + dvi_vga_timings: timing0 { clock-frequency = <31250000>; hactive = <640>; vactive = <480>; diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts index fb66884d8a2f..59b40d13a640 100644 --- a/arch/arm/boot/dts/imx25-pdk.dts +++ b/arch/arm/boot/dts/imx25-pdk.dts @@ -78,7 +78,7 @@ bus-width = <18>; display-timings { native-mode = <&wvga_timings>; - wvga_timings: 640x480 { + wvga_timings: timing0 { hactive = <640>; vactive = <480>; hback-porch = <45>; diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index 5f90d72b840b..5ac4549286bd 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -543,7 +543,7 @@ }; iim: efuse@53ff0000 { - compatible = "fsl,imx25-iim", "fsl,imx27-iim"; + compatible = "fsl,imx25-iim"; reg = <0x53ff0000 0x4000>; interrupts = <19>; clocks = <&clks 99>; diff --git a/arch/arm/boot/dts/imx27-apf27dev.dts b/arch/arm/boot/dts/imx27-apf27dev.dts index 6f1e8ce9e76e..3d9bb7fc3be2 100644 --- a/arch/arm/boot/dts/imx27-apf27dev.dts +++ b/arch/arm/boot/dts/imx27-apf27dev.dts @@ -16,7 +16,7 @@ fsl,pcr = <0xfae80083>; /* non-standard but required */ display-timings { native-mode = <&timing0>; - timing0: 800x480 { + timing0: timing0 { clock-frequency = <33000033>; hactive = <800>; vactive = <480>; @@ -47,7 +47,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpio_leds>; - user { + led-user { label = "Heartbeat"; gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>; linux,default-trigger = "heartbeat"; diff --git a/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi b/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi index 74110bbcd9d4..c7e923584878 100644 --- a/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi +++ b/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi @@ -33,7 +33,7 @@ pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; - pcf8563@51 { + rtc@51 { compatible = "nxp,pcf8563"; reg = <0x51>; }; @@ -90,7 +90,7 @@ &weim { status = "okay"; - nor: nor@0,0 { + nor: flash@0,0 { #address-cells = <1>; #size-cells = <1>; compatible = "cfi-flash"; diff --git a/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts b/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts index 9c3ec82ec7e5..50fa0bd4c8a1 100644 --- a/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts +++ b/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts @@ -16,7 +16,7 @@ display-timings { native-mode = <&timing0>; - timing0: 320x240 { + timing0: timing0 { clock-frequency = <6500000>; hactive = <320>; vactive = <240>; diff --git a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts index 188639738dc3..7f36af150a25 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts +++ b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts @@ -19,7 +19,7 @@ fsl,pcr = <0xf0c88080>; /* non-standard but required */ display-timings { native-mode = <&timing0>; - timing0: 640x480 { + timing0: timing0 { hactive = <640>; vactive = <480>; hback-porch = <112>; diff --git a/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts index 344e77790152..d133b9f08b3a 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts +++ b/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts @@ -19,7 +19,7 @@ display-timings { native-mode = <&timing0>; - timing0: 240x320 { + timing0: timing0 { clock-frequency = <5500000>; hactive = <240>; vactive = <320>; diff --git a/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi index 3d10273177e9..a5fdc2fd4ce5 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi @@ -322,7 +322,7 @@ &weim { status = "okay"; - nor: nor@0,0 { + nor: flash@0,0 { compatible = "cfi-flash"; reg = <0 0x00000000 0x02000000>; bank-width = <2>; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index e140307be2e7..eb6daf22486e 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -588,6 +588,9 @@ iram: sram@ffff4c00 { compatible = "mmio-sram"; reg = <0xffff4c00 0xb400>; + ranges = <0 0xffff4c00 0xb400>; + #address-cells = <1>; + #size-cells = <1>; }; }; }; diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 130b4145af82..b81592a61311 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -78,7 +78,7 @@ status = "disabled"; }; - dma_apbh: dma-apbh@80004000 { + dma_apbh: dma-controller@80004000 { compatible = "fsl,imx28-dma-apbh"; reg = <0x80004000 0x2000>; interrupts = <82 83 84 85 @@ -994,7 +994,7 @@ status = "disabled"; }; - dma_apbx: dma-apbx@80024000 { + dma_apbx: dma-controller@80024000 { compatible = "fsl,imx28-dma-apbx"; reg = <0x80024000 0x2000>; interrupts = <78 79 66 0 diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index aacbf317feea..4b7aee895892 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -106,8 +106,6 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii-id"; - phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; - phy-reset-duration = <20>; phy-supply = <&sw2_reg>; status = "okay"; @@ -120,17 +118,10 @@ #address-cells = <1>; #size-cells = <0>; - phy_port2: phy@1 { - reg = <1>; - }; - - phy_port3: phy@2 { - reg = <2>; - }; - switch@10 { compatible = "qca,qca8334"; - reg = <10>; + reg = <0x10>; + reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; switch_ports: ports { #address-cells = <1>; @@ -151,15 +142,30 @@ eth2: port@2 { reg = <2>; label = "eth2"; + phy-mode = "internal"; phy-handle = <&phy_port2>; }; eth1: port@3 { reg = <3>; label = "eth1"; + phy-mode = "internal"; phy-handle = <&phy_port3>; }; }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + phy_port2: ethernet-phy@1 { + reg = <1>; + }; + + phy_port3: ethernet-phy@2 { + reg = <2>; + }; + }; }; }; }; diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts index f9f7d99bd4db..76f3e07bc882 100644 --- a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts +++ b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts @@ -76,6 +76,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can1_power>; regulator-name = "can1_supply"; + startup-delay-us = <1000>; }; reg_can2_supply: regulator-can2-supply { @@ -85,6 +86,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can2_power>; regulator-name = "can2_supply"; + startup-delay-us = <1000>; }; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index ff1e0173b39b..2c6eada01d79 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -150,7 +150,7 @@ interrupt-parent = <&gpc>; ranges; - dma_apbh: dma-apbh@110000 { + dma_apbh: dma-controller@110000 { compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x00110000 0x2000>; interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 1f1053a898fb..67d344ae76b5 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -209,7 +209,7 @@ power-domains = <&pd_pu>; }; - dma_apbh: dma-apbh@1804000 { + dma_apbh: dma-controller@1804000 { compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x01804000 0x2000>; interrupts = , diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index 2b5996395701..aac081b6daaa 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -164,7 +164,7 @@ <0x00a06000 0x2000>; }; - dma_apbh: dma-apbh@1804000 { + dma_apbh: dma-controller@1804000 { compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x01804000 0x2000>; interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index 7ceb7c09f7ad..7ef685fdda55 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -208,9 +208,6 @@ }; &ca_funnel_in_ports { - #address-cells = <1>; - #size-cells = <0>; - port@1 { reg = <1>; ca_funnel_in_port1: endpoint { diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 45947707134b..69aebc691526 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -190,7 +190,11 @@ clock-names = "apb_pclk"; ca_funnel_in_ports: in-ports { - port { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; ca_funnel_in_port0: endpoint { remote-endpoint = <&etm0_out_port>; }; @@ -814,7 +818,7 @@ }; lcdif: lcdif@30730000 { - compatible = "fsl,imx7d-lcdif", "fsl,imx28-lcdif"; + compatible = "fsl,imx7d-lcdif", "fsl,imx6sx-lcdif"; reg = <0x30730000 0x10000>; interrupts = ; clocks = <&clks IMX7D_LCDIF_PIXEL_ROOT_CLK>, @@ -1263,14 +1267,13 @@ }; }; - dma_apbh: dma-apbh@33000000 { + dma_apbh: dma-controller@33000000 { compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x33000000 0x2000>; interrupts = , , , ; - interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3"; #dma-cells = <1>; dma-channels = <4>; clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; @@ -1279,7 +1282,7 @@ gpmi: nand-controller@33002000{ compatible = "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; diff --git a/arch/arm/boot/dts/mmp2-brownstone.dts b/arch/arm/boot/dts/mmp2-brownstone.dts index 04f1ae1382e7..bc64348b8218 100644 --- a/arch/arm/boot/dts/mmp2-brownstone.dts +++ b/arch/arm/boot/dts/mmp2-brownstone.dts @@ -28,7 +28,7 @@ &twsi1 { status = "okay"; pmic: max8925@3c { - compatible = "maxium,max8925"; + compatible = "maxim,max8925"; reg = <0x3c>; interrupts = <1>; interrupt-parent = <&intcmux4>; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index c4b2e9ac2494..5ea45e486ed5 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -1134,7 +1134,7 @@ qfprom: qfprom@fc4bc000 { compatible = "qcom,msm8974-qfprom", "qcom,qfprom"; - reg = <0xfc4bc000 0x1000>; + reg = <0xfc4bc000 0x2100>; #address-cells = <1>; #size-cells = <1>; tsens_calib: calib@d0 { diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi index a4bf1d5ee206..b85820448b9d 100644 --- a/arch/arm/boot/dts/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom-sdx55.dtsi @@ -495,10 +495,10 @@ <&gcc GCC_USB30_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 51 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 11 IRQ_TYPE_EDGE_BOTH>, + <&pdc 10 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -522,7 +522,7 @@ compatible = "qcom,sdx55-pdc", "qcom,pdc"; reg = <0x0b210000 0x30000>; qcom,pdc-ranges = <0 179 52>; - #interrupt-cells = <3>; + #interrupt-cells = <2>; interrupt-parent = <&intc>; interrupt-controller; }; diff --git a/arch/arm/boot/dts/r8a73a4-ape6evm.dts b/arch/arm/boot/dts/r8a73a4-ape6evm.dts index e81a7213d304..4282bafbb504 100644 --- a/arch/arm/boot/dts/r8a73a4-ape6evm.dts +++ b/arch/arm/boot/dts/r8a73a4-ape6evm.dts @@ -209,6 +209,18 @@ status = "okay"; }; +&extal1_clk { + clock-frequency = <26000000>; +}; + +&extal2_clk { + clock-frequency = <48000000>; +}; + +&extalr_clk { + clock-frequency = <32768>; +}; + &pfc { scifa0_pins: scifa0 { groups = "scifa0_data"; diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index c39066967053..d1f4cbd099ef 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -450,17 +450,20 @@ extalr_clk: extalr { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <32768>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; extal1_clk: extal1 { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <25000000>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; extal2_clk: extal2 { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <48000000>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; fsiack_clk: fsiack { compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index fc3476151bac..9188d91bebfe 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -642,12 +642,20 @@ #sound-dai-cells = <0>; status = "disabled"; - hdmi_in: port { + ports { #address-cells = <1>; #size-cells = <0>; - hdmi_in_vop: endpoint@0 { + + hdmi_in: port@0 { reg = <0>; - remote-endpoint = <&vop_out_hdmi>; + + hdmi_in_vop: endpoint { + remote-endpoint = <&vop_out_hdmi>; + }; + }; + + hdmi_out: port@1 { + reg = <1>; }; }; }; diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index 433ee4ddce6c..f85933fdec75 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -24,8 +24,8 @@ #include "sha256_glue.h" -asmlinkage void sha256_block_data_order(u32 *digest, const void *data, - unsigned int num_blks); +asmlinkage void sha256_block_data_order(struct sha256_state *state, + const u8 *data, int num_blks); int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -33,23 +33,20 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, /* make sure casting to sha256_block_fn() is safe */ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); - return sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + return sha256_base_do_update(desc, data, len, sha256_block_data_order); } EXPORT_SYMBOL(crypto_sha256_arm_update); static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out) { - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_finalize(desc, sha256_block_data_order); return sha256_base_finish(desc, out); } int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_update(desc, data, len, sha256_block_data_order); return crypto_sha256_arm_final(desc, out); } EXPORT_SYMBOL(crypto_sha256_arm_finup); diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c index 0635a65aa488..1be5bd498af3 100644 --- a/arch/arm/crypto/sha512-glue.c +++ b/arch/arm/crypto/sha512-glue.c @@ -25,27 +25,25 @@ MODULE_ALIAS_CRYPTO("sha512"); MODULE_ALIAS_CRYPTO("sha384-arm"); MODULE_ALIAS_CRYPTO("sha512-arm"); -asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks); +asmlinkage void sha512_block_data_order(struct sha512_state *state, + u8 const *src, int blocks); int sha512_arm_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); + return sha512_base_do_update(desc, data, len, sha512_block_data_order); } static int sha512_arm_final(struct shash_desc *desc, u8 *out) { - sha512_base_do_finalize(desc, - (sha512_block_fn *)sha512_block_data_order); + sha512_base_do_finalize(desc, sha512_block_data_order); return sha512_base_finish(desc, out); } int sha512_arm_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); + sha512_base_do_update(desc, data, len, sha512_block_data_order); return sha512_arm_final(desc, out); } diff --git a/arch/arm/include/asm/irq_work.h b/arch/arm/include/asm/irq_work.h index 3149e4dc1b54..8895999834cc 100644 --- a/arch/arm/include/asm/irq_work.h +++ b/arch/arm/include/asm/irq_work.h @@ -9,6 +9,4 @@ static inline bool arch_irq_work_has_interrupt(void) return is_smp(); } -extern void arch_irq_work_raise(void); - #endif /* _ASM_ARM_IRQ_WORK_H */ diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index e12d7d096fc0..e4eb54f6cd9f 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -11,7 +11,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(nop) "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -25,7 +25,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 95e731676cea..961daac65326 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), GPIO_LOOKUP_IDX("G", 0, NULL, 1, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + { } }, }; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ea70eb960565..c15f71501c6c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -983,8 +983,12 @@ config ARM64_ERRATUM_2457168 If unsure, say Y. +config ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + bool + config ARM64_ERRATUM_2966298 bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD default y help This option adds the workaround for ARM Cortex-A520 erratum 2966298. diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 9ec49ac2f6fd..381d58cea092 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -291,6 +291,8 @@ }; &spdif { + pinctrl-names = "default"; + pinctrl-0 = <&spdif_tx_pin>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi index 4903d6358112..855b7d43bc50 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi @@ -166,6 +166,8 @@ }; &spdif { + pinctrl-names = "default"; + pinctrl-0 = <&spdif_tx_pin>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index ca1d287a0a01..d11e5041bae9 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -406,6 +406,7 @@ function = "spi1"; }; + /omit-if-no-ref/ spdif_tx_pin: spdif-tx-pin { pins = "PH7"; function = "spdif"; @@ -655,10 +656,8 @@ clocks = <&ccu CLK_BUS_SPDIF>, <&ccu CLK_SPDIF>; clock-names = "apb", "spdif"; resets = <&ccu RST_BUS_SPDIF>; - dmas = <&dma 2>; - dma-names = "tx"; - pinctrl-names = "default"; - pinctrl-0 = <&spdif_tx_pin>; + dmas = <&dma 2>, <&dma 2>; + dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi index 4eb2cd14e00b..9b6da84deae7 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi @@ -145,7 +145,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <160>; al,msi-num-spis = <160>; diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi index 73a352ea8fd5..b30014d4dc29 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi @@ -351,7 +351,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <336>; al,msi-num-spis = <959>; diff --git a/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts b/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts index 8ffbcb2b1ac5..bbd3c05cbd90 100644 --- a/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts +++ b/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts @@ -15,7 +15,7 @@ #size-cells = <2>; aliases { - serial0 = &uart_B; + serial0 = &uart_b; }; memory@0 { @@ -25,6 +25,6 @@ }; -&uart_B { +&uart_b { status = "okay"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi index ad50cba42d19..372a03762d69 100644 --- a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi @@ -118,14 +118,14 @@ <10 11 12 13 14 15 16 17 18 19 20 21>; }; - uart_B: serial@7a000 { + uart_b: serial@7a000 { compatible = "amlogic,meson-s4-uart", "amlogic,meson-ao-uart"; reg = <0x0 0x7a000 0x0 0x18>; interrupts = ; - status = "disabled"; clocks = <&xtal>, <&xtal>, <&xtal>; clock-names = "xtal", "pclk", "baud"; + status = "disabled"; }; reset: reset-controller@2000 { diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi index df7134854206..a4c5a38905b0 100644 --- a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi @@ -180,9 +180,6 @@ brcm,num-gphy = <5>; brcm,num-rgmii-ports = <2>; - #address-cells = <1>; - #size-cells = <0>; - ports: ports { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index fda97c47f4e9..d5778417455c 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -584,6 +584,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 8f8c25e51194..473d7d0ddf36 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -442,6 +442,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; gpio-ranges = <&pinmux 0 0 16>, <&pinmux 16 71 2>, diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts index 8b16bd68576c..d9fa0deea700 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts @@ -294,8 +294,8 @@ pinctrl_i2c4: i2c4grp { fsl,pins = < - MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x400001c3 - MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x400001c3 + MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x40000083 + MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x40000083 >; }; @@ -313,19 +313,19 @@ pinctrl_uart1: uart1grp { fsl,pins = < - MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x140 - MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x140 - MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x0 + MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x0 + MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x0 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x140 - MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x140 - MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x0 + MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x0 + MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x0 >; }; @@ -337,40 +337,40 @@ pinctrl_usdhc2: usdhc2grp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x190 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x194 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x196 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts index a079322a3793..d54cddd65b52 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts @@ -277,8 +277,8 @@ pinctrl_i2c4: i2c4grp { fsl,pins = < - MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x400001c3 - MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x400001c3 + MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x40000083 + MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x40000083 >; }; @@ -290,19 +290,19 @@ pinctrl_uart1: uart1grp { fsl,pins = < - MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x140 - MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x140 - MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x0 + MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x0 + MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x0 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x140 - MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x140 - MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x0 + MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x0 + MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x0 >; }; @@ -314,40 +314,40 @@ pinctrl_usdhc2: usdhc2grp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x190 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x194 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x196 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi index 8d10f5b41297..d5199ecb3f6c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi @@ -205,7 +205,7 @@ reg = <0x52>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_rtc>; - interrupts-extended = <&gpio4 1 IRQ_TYPE_LEVEL_HIGH>; + interrupts-extended = <&gpio4 1 IRQ_TYPE_LEVEL_LOW>; trickle-diode-disable; }; }; @@ -247,8 +247,8 @@ pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x400001c3 - MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3 + MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x40000083 + MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x40000083 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi index 0679728d2489..884ae2ad3511 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi @@ -237,8 +237,8 @@ pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x400001c3 - MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3 + MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x40000083 + MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x40000083 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi index c557dbf4dcd6..2e90466db89a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi @@ -47,17 +47,6 @@ gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; status = "okay"; }; - - reg_usb_otg1_vbus: regulator-usb-otg1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_reg_usb1_en>; - compatible = "regulator-fixed"; - regulator-name = "usb_otg1_vbus"; - gpio = <&gpio1 10 GPIO_ACTIVE_HIGH>; - enable-active-high; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - }; }; /* off-board header */ @@ -146,9 +135,10 @@ }; &usbotg1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; dr_mode = "otg"; over-current-active-low; - vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; @@ -206,14 +196,6 @@ >; }; - pinctrl_reg_usb1_en: regusb1grp { - fsl,pins = < - MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x41 - MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x141 - MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x41 - >; - }; - pinctrl_spi2: spi2grp { fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 @@ -236,4 +218,11 @@ MX8MM_IOMUXC_UART3_TXD_UART3_DCE_TX 0x140 >; }; + + pinctrl_usbotg1: usbotg1grp { + fsl,pins = < + MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x141 + MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x41 + >; + }; }; diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 78ae73d0cf36..98ff17b14b2a 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -124,7 +124,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi index 2173316573be..8e9410d8f46c 100644 --- a/arch/arm64/boot/dts/lg/lg1313.dtsi +++ b/arch/arm64/boot/dts/lg/lg1313.dtsi @@ -124,7 +124,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index df152c72276b..cd28e1c45b70 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -426,14 +426,14 @@ crypto: crypto@90000 { compatible = "inside-secure,safexcel-eip97ies"; reg = <0x90000 0x20000>; - interrupts = , - , + interrupts = , , , , - ; - interrupt-names = "mem", "ring0", "ring1", - "ring2", "ring3", "eip"; + , + ; + interrupt-names = "ring0", "ring1", "ring2", + "ring3", "eip", "mem"; clocks = <&nb_periph_clk 15>; }; diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi index a06a0a889c43..73d8803b54d8 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi @@ -133,7 +133,6 @@ odmi: odmi@300000 { compatible = "marvell,odmi-controller"; - interrupt-controller; msi-controller; marvell,odmi-frames = <4>; reg = <0x300000 0x4000>, diff --git a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi index d6c0990a267d..218c059b16d9 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi @@ -506,14 +506,14 @@ CP11X_LABEL(crypto): crypto@800000 { compatible = "inside-secure,safexcel-eip197b"; reg = <0x800000 0x200000>; - interrupts = <87 IRQ_TYPE_LEVEL_HIGH>, - <88 IRQ_TYPE_LEVEL_HIGH>, + interrupts = <88 IRQ_TYPE_LEVEL_HIGH>, <89 IRQ_TYPE_LEVEL_HIGH>, <90 IRQ_TYPE_LEVEL_HIGH>, <91 IRQ_TYPE_LEVEL_HIGH>, - <92 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "mem", "ring0", "ring1", - "ring2", "ring3", "eip"; + <92 IRQ_TYPE_LEVEL_HIGH>, + <87 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "ring0", "ring1", "ring2", "ring3", + "eip", "mem"; clock-names = "core", "reg"; clocks = <&CP11X_LABEL(clk) 1 26>, <&CP11X_LABEL(clk) 1 17>; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 2c35ed0734a4..b1ddc491d293 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -74,6 +74,7 @@ memory@40000000 { reg = <0 0x40000000 0 0x40000000>; + device_type = "memory"; }; reg_1p8v: regulator-1p8v { diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index f9313b697ac1..527dcb279ba5 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -56,6 +56,7 @@ memory@40000000 { reg = <0 0x40000000 0 0x20000000>; + device_type = "memory"; }; reg_1p8v: regulator-1p8v { diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index fc338bd497f5..108931e79646 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -110,6 +110,7 @@ compatible = "mediatek,mt7986-infracfg", "syscon"; reg = <0 0x10001000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; wed_pcie: wed-pcie@10003000 { diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index dccf367c7ec6..3d95625f1b0b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -4,6 +4,8 @@ */ #include "mt8183-kukui.dtsi" +/* Must come after mt8183-kukui.dtsi to modify cros_ec */ +#include / { panel: panel { diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi index 50a0dd36b5fb..0d3c7b8162ff 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi @@ -372,6 +372,16 @@ }; }; +&cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "GO_KAKADU"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi index 06f8c80bf553..e73113cb51f5 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi @@ -339,6 +339,16 @@ }; }; +&cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "GO_KODAMA"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi index a7b0cb3ff7b0..181da69d18f4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi @@ -343,6 +343,16 @@ }; }; +&cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "LE_Krane"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index a428a581c93a..1db97d94658b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -896,10 +896,6 @@ google,usb-port-id = <0>; }; - cbas { - compatible = "google,cros-cbas"; - }; - typec { compatible = "google,cros-ec-typec"; #address-cells = <1>; @@ -999,5 +995,4 @@ }; }; -#include #include diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index 50367da93cd7..c6080af1e4a3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -819,10 +819,6 @@ #address-cells = <1>; #size-cells = <0>; - base_detection: cbas { - compatible = "google,cros-cbas"; - }; - cros_ec_pwm: pwm { compatible = "google,cros-ec-pwm"; #pwm-cells = <1>; diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 2f40c6cc407c..4ed8a0f18758 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -1539,7 +1539,7 @@ mediatek,scp = <&scp>; power-domains = <&spm MT8192_POWER_DOMAIN_VENC>; clocks = <&vencsys CLK_VENC_SET1_VENC>; - clock-names = "venc-set1"; + clock-names = "venc_sel"; assigned-clocks = <&topckgen CLK_TOP_VENC_SEL>; assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D4>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts index 3348ba69ff6c..d86d193e5a75 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts @@ -13,3 +13,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts index 4669e9d917f8..5356f53308e2 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts @@ -33,3 +33,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts index 5021edd02f7c..fca3606cb951 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts @@ -34,3 +34,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index 5117b2e7985a..998c2e78168a 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -111,6 +111,7 @@ compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; + #interrupt-cells = <1>; interrupts-extended = <&pio 101 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts index f094011be9ed..8099dc04ed2e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts @@ -2024,7 +2024,7 @@ status = "okay"; phy-handle = <&mgbe0_phy>; - phy-mode = "usxgmii"; + phy-mode = "10gbase-r"; mdio { #address-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts index 9d116e1fbe10..1ac4f8c24e23 100644 --- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts +++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts @@ -169,10 +169,6 @@ }; }; -&blsp_dma { - status = "okay"; -}; - &blsp_i2c2 { /* On Low speed expansion */ status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index bafac2cf7e3d..987cebbda057 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1522,7 +1522,7 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <0>; - status = "disabled"; + qcom,controlled-remotely; }; blsp1_uart1: serial@78af000 { diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 9de2248a385a..789121171a11 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -390,6 +390,19 @@ reg = <0x0 0x80000000 0x0 0x0>; }; + etm { + compatible = "qcom,coresight-remote-etm"; + + out-ports { + port { + modem_etm_out_funnel_in2: endpoint { + remote-endpoint = + <&funnel_in2_in_modem_etm>; + }; + }; + }; + }; + psci { compatible = "arm,psci-1.0"; method = "smc"; @@ -2565,6 +2578,14 @@ clocks = <&rpmcc RPM_QDSS_CLK>, <&rpmcc RPM_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; + in-ports { + port { + funnel_in2_in_modem_etm: endpoint { + remote-endpoint = + <&modem_etm_out_funnel_in2>; + }; + }; + }; out-ports { port { diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index b00b8164c4aa..7a41250539ff 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -1903,9 +1903,11 @@ cpu = <&CPU4>; - port{ - etm4_out: endpoint { - remote-endpoint = <&apss_funnel_in4>; + out-ports { + port{ + etm4_out: endpoint { + remote-endpoint = <&apss_funnel_in4>; + }; }; }; }; @@ -1920,9 +1922,11 @@ cpu = <&CPU5>; - port{ - etm5_out: endpoint { - remote-endpoint = <&apss_funnel_in5>; + out-ports { + port{ + etm5_out: endpoint { + remote-endpoint = <&apss_funnel_in5>; + }; }; }; }; @@ -1937,9 +1941,11 @@ cpu = <&CPU6>; - port{ - etm6_out: endpoint { - remote-endpoint = <&apss_funnel_in6>; + out-ports { + port{ + etm6_out: endpoint { + remote-endpoint = <&apss_funnel_in6>; + }; }; }; }; @@ -1954,9 +1960,11 @@ cpu = <&CPU7>; - port{ - etm7_out: endpoint { - remote-endpoint = <&apss_funnel_in7>; + out-ports { + port{ + etm7_out: endpoint { + remote-endpoint = <&apss_funnel_in7>; + }; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 78e537f1d796..13fe1c92bf35 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -2769,8 +2769,8 @@ interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 8 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 9 IRQ_TYPE_LEVEL_HIGH>; + <&pdc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 7fc8c2045022..b5cd24d59ad9 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -2028,8 +2028,16 @@ ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>, <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>; - interrupts = ; - interrupt-names = "msi"; + interrupts = , + , + , + , + , + , + , + ; + interrupt-names = "msi0", "msi1", "msi2", "msi3", + "msi4", "msi5", "msi6", "msi7"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; interrupt-map = <0 0 0 1 &intc 0 0 0 434 IRQ_TYPE_LEVEL_HIGH>, @@ -3664,9 +3672,9 @@ assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 14 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 14 IRQ_TYPE_EDGE_BOTH>, <&pdc 15 IRQ_TYPE_EDGE_BOTH>, - <&pdc 17 IRQ_TYPE_EDGE_BOTH>; + <&pdc 17 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "hs_phy_irq", "dp_hs_phy_irq", "dm_hs_phy_irq", diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index 135ff4368c4a..5c04c91b0ee2 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -532,7 +532,7 @@ &pcie0 { status = "okay"; perst-gpios = <&tlmm 35 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 134 GPIO_ACTIVE_HIGH>; + wake-gpios = <&tlmm 134 GPIO_ACTIVE_HIGH>; vddpe-3v3-supply = <&pcie0_3p3v_dual>; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 1e6841902900..95c515da9f2e 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4048,10 +4048,10 @@ <&gcc GCC_USB30_PRIM_MASTER_CLK>; assigned-clock-rates = <19200000>, <150000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 6 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -4099,10 +4099,10 @@ <&gcc GCC_USB30_SEC_MASTER_CLK>; assigned-clock-rates = <19200000>, <150000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 7 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 10 IRQ_TYPE_EDGE_BOTH>, + <&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index c3c12b0cd416..9dccecd9fcae 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -1822,8 +1822,8 @@ phys = <&pcie0_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 35 GPIO_ACTIVE_HIGH>; - enable-gpio = <&tlmm 37 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; + wake-gpios = <&tlmm 37 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie0_default_state>; @@ -1925,7 +1925,7 @@ phys = <&pcie1_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 102 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 102 GPIO_ACTIVE_HIGH>; enable-gpio = <&tlmm 104 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; @@ -3628,10 +3628,10 @@ <&gcc GCC_USB30_PRIM_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -3677,10 +3677,10 @@ <&gcc GCC_USB30_SEC_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 7 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 10 IRQ_TYPE_EDGE_BOTH>, + <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi index ed9400f903c9..b677ef6705d9 100644 --- a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi @@ -656,8 +656,8 @@ avb0: ethernet@e6800000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; - reg = <0 0xe6800000 0 0x800>; + "renesas,etheravb-rcar-gen4"; + reg = <0 0xe6800000 0 0x1000>; interrupts = , , , @@ -704,8 +704,8 @@ avb1: ethernet@e6810000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; - reg = <0 0xe6810000 0 0x800>; + "renesas,etheravb-rcar-gen4"; + reg = <0 0xe6810000 0 0x1000>; interrupts = , , , @@ -752,7 +752,7 @@ avb2: ethernet@e6820000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6820000 0 0x1000>; interrupts = , , @@ -800,7 +800,7 @@ avb3: ethernet@e6830000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6830000 0 0x1000>; interrupts = , , @@ -848,7 +848,7 @@ avb4: ethernet@e6840000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6840000 0 0x1000>; interrupts = , , @@ -896,7 +896,7 @@ avb5: ethernet@e6850000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6850000 0 0x1000>; interrupts = , , @@ -1019,7 +1019,7 @@ msiof0: spi@e6e90000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6e90000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 618>; @@ -1034,7 +1034,7 @@ msiof1: spi@e6ea0000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6ea0000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 619>; @@ -1049,7 +1049,7 @@ msiof2: spi@e6c00000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c00000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 620>; @@ -1064,7 +1064,7 @@ msiof3: spi@e6c10000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c10000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 621>; @@ -1079,7 +1079,7 @@ msiof4: spi@e6c20000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c20000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 622>; @@ -1094,7 +1094,7 @@ msiof5: spi@e6c28000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c28000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 623>; diff --git a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi index d58b18802cb0..868d1a3cbdf6 100644 --- a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi @@ -337,7 +337,7 @@ avb0: ethernet@e6800000 { compatible = "renesas,etheravb-r8a779g0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6800000 0 0x800>; + reg = <0 0xe6800000 0 0x1000>; interrupts = , , , @@ -384,7 +384,7 @@ avb1: ethernet@e6810000 { compatible = "renesas,etheravb-r8a779g0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6810000 0 0x800>; + reg = <0 0xe6810000 0 0x1000>; interrupts = , , , diff --git a/arch/arm64/boot/dts/renesas/r9a07g043.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043.dtsi index a4738842f064..7f88395ff799 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043.dtsi @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* - * Device Tree Source for the RZ/G2UL SoC + * Device Tree Source for the RZ/Five and RZ/G2UL SoCs * * Copyright (C) 2022 Renesas Electronics Corp. */ @@ -68,36 +68,8 @@ }; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu0: cpu@0 { - compatible = "arm,cortex-a55"; - reg = <0>; - device_type = "cpu"; - #cooling-cells = <2>; - next-level-cache = <&L3_CA55>; - enable-method = "psci"; - clocks = <&cpg CPG_CORE R9A07G043_CLK_I>; - operating-points-v2 = <&cluster0_opp>; - }; - - L3_CA55: cache-controller-0 { - compatible = "cache"; - cache-unified; - cache-size = <0x40000>; - }; - }; - - psci { - compatible = "arm,psci-1.0", "arm,psci-0.2"; - method = "smc"; - }; - soc: soc { compatible = "simple-bus"; - interrupt-parent = <&gic>; #address-cells = <2>; #size-cells = <2>; ranges; @@ -545,12 +517,6 @@ sysc: system-controller@11020000 { compatible = "renesas,r9a07g043-sysc"; reg = <0 0x11020000 0 0x10000>; - interrupts = , - , - , - ; - interrupt-names = "lpm_int", "ca55stbydone_int", - "cm33stbyr_int", "ca55_deny"; status = "disabled"; }; @@ -603,16 +569,6 @@ dma-channels = <16>; }; - gic: interrupt-controller@11900000 { - compatible = "arm,gic-v3"; - #interrupt-cells = <3>; - #address-cells = <0>; - interrupt-controller; - reg = <0x0 0x11900000 0 0x40000>, - <0x0 0x11940000 0 0x60000>; - interrupts = ; - }; - sdhi0: mmc@11c00000 { compatible = "renesas,sdhi-r9a07g043", "renesas,rcar-gen3-sdhi"; @@ -893,12 +849,4 @@ }; }; }; - - timer { - compatible = "arm,armv8-timer"; - interrupts-extended = <&gic GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>; - }; }; diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi index 96f935bc2d4d..011d4c88f4ed 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi @@ -10,3 +10,139 @@ #define SOC_PERIPHERAL_IRQ(nr) GIC_SPI nr #include "r9a07g043.dtsi" + +/ { + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + compatible = "arm,cortex-a55"; + reg = <0>; + device_type = "cpu"; + #cooling-cells = <2>; + next-level-cache = <&L3_CA55>; + enable-method = "psci"; + clocks = <&cpg CPG_CORE R9A07G043_CLK_I>; + operating-points-v2 = <&cluster0_opp>; + }; + + L3_CA55: cache-controller-0 { + compatible = "cache"; + cache-unified; + cache-size = <0x40000>; + }; + }; + + psci { + compatible = "arm,psci-1.0", "arm,psci-0.2"; + method = "smc"; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts-extended = <&gic GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>; + }; +}; + +&soc { + interrupt-parent = <&gic>; + + irqc: interrupt-controller@110a0000 { + compatible = "renesas,r9a07g043u-irqc", + "renesas,rzg2l-irqc"; + reg = <0 0x110a0000 0 0x10000>; + #interrupt-cells = <2>; + #address-cells = <0>; + interrupt-controller; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", + "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; + clocks = <&cpg CPG_MOD R9A07G043_IA55_CLK>, + <&cpg CPG_MOD R9A07G043_IA55_PCLK>; + clock-names = "clk", "pclk"; + power-domains = <&cpg>; + resets = <&cpg R9A07G043_IA55_RESETN>; + }; + + gic: interrupt-controller@11900000 { + compatible = "arm,gic-v3"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x11900000 0 0x40000>, + <0x0 0x11940000 0 0x60000>; + interrupts = ; + }; +}; + +&sysc { + interrupts = , + , + , + ; + interrupt-names = "lpm_int", "ca55stbydone_int", + "cm33stbyr_int", "ca55_deny"; +}; diff --git a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi index 7dbf6a6292f4..d26488b5a82d 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi @@ -698,7 +698,27 @@ , , , - ; + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>, <&cpg CPG_MOD R9A07G044_IA55_PCLK>; clock-names = "clk", "pclk"; diff --git a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi index e000510b90a4..b3d37ca942ee 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi @@ -704,7 +704,27 @@ , , , - ; + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; clocks = <&cpg CPG_MOD R9A07G054_IA55_CLK>, <&cpg CPG_MOD R9A07G054_IA55_PCLK>; clock-names = "clk", "pclk"; diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi index 588b14b66b6f..f37abfc13fe5 100644 --- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi @@ -251,6 +251,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <8 IRQ_TYPE_EDGE_FALLING>; @@ -311,6 +312,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <4 IRQ_TYPE_EDGE_FALLING>; }; @@ -331,6 +333,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio7>; interrupts = <3 IRQ_TYPE_EDGE_FALLING>; }; @@ -341,6 +344,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio5>; interrupts = <9 IRQ_TYPE_EDGE_FALLING>; }; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index 28cccc18f133..3d1ebadaf64a 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -1027,6 +1027,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 12>, <&dmac 13>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>; #address-cells = <1>; @@ -1042,6 +1043,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 14>, <&dmac 15>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index f20c2fe95127..bada80a16e9c 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -1316,6 +1316,7 @@ compatible = "rockchip,rk3568-vpu"; reg = <0x0 0xfdea0000 0x0 0x800>; interrupts = ; + interrupt-names = "vdpu"; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; clock-names = "aclk", "hclk"; iommus = <&vdpu_mmu>; diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh index 7399d706967a..9b7a09808a3d 100755 --- a/arch/arm64/boot/install.sh +++ b/arch/arm64/boot/install.sh @@ -17,7 +17,8 @@ # $3 - kernel map file # $4 - default install path (blank if root directory) -if [ "$(basename $2)" = "Image.gz" ]; then +if [ "$(basename $2)" = "Image.gz" ] || [ "$(basename $2)" = "vmlinuz.efi" ] +then # Compressed install echo "Installing compressed kernel" base=vmlinuz diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index bac4cabef607..467ac2f768ac 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req) src += blocks * AES_BLOCK_SIZE; } if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, nbytes, walk.iv); + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); + nbytes = 0; } kernel_neon_end(); diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 3622e9f4fb44..51738c56e96c 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -229,7 +229,7 @@ alternative_has_feature_likely(unsigned long feature) compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); - asm_volatile_goto( + asm goto( ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) : : [feature] "i" (feature) @@ -247,7 +247,7 @@ alternative_has_feature_unlikely(unsigned long feature) compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); - asm_volatile_goto( + asm goto( ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) : : [feature] "i" (feature) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7dce9c0aa783..af3a678a76b3 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -61,6 +61,7 @@ #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_MICROSOFT 0x6D #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -128,6 +129,8 @@ #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -179,6 +182,7 @@ #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 62c846be2d76..a75c0772ecfc 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -103,6 +103,7 @@ static inline void free_screen_info(struct screen_info *si) } #define EFI_ALLOC_ALIGN SZ_64K +#define EFI_ALLOC_LIMIT ((1UL << 48) - 1) /* * On ARM systems, virtually remapped UEFI runtime services are set up in two diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index d720b6f7e5f9..930b0e6c9462 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -36,13 +36,13 @@ * When we defined the maximum SVE vector length we defined the ABI so * that the maximum vector length included all the reserved for future * expansion bits in ZCR rather than those just currently defined by - * the architecture. While SME follows a similar pattern the fact that - * it includes a square matrix means that any allocations that attempt - * to cover the maximum potential vector length (such as happen with - * the regset used for ptrace) end up being extremely large. Define - * the much lower actual limit for use in such situations. + * the architecture. Using this length to allocate worst size buffers + * results in excessively large allocations, and this effect is even + * more pronounced for SME due to ZA. Define more suitable VLs for + * these situations. */ -#define SME_VQ_MAX 16 +#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1) +#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1) struct task_struct; @@ -343,6 +343,7 @@ extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +extern void sme_suspend_exit(void); /* * Return how many bytes of memory are required to store the full SME @@ -372,6 +373,7 @@ static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline void sme_suspend_exit(void) { } static inline size_t za_state_size(struct task_struct const *task) { diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index 81bbfa3a035b..a1020285ea75 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,8 +2,6 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -extern void arch_irq_work_raise(void); - static inline bool arch_irq_work_has_interrupt(void) { return true; diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index cea441b6aa5d..b5bd3c38a01b 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" @@ -35,7 +35,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: b %l[l_yes] \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 3f917124684c..74584597bfb8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -390,6 +390,7 @@ static const struct midr_range erratum_1463225[] = { static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2139208 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -403,6 +404,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { static const struct midr_range tsb_flush_fail_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2067961 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2054223 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -415,6 +417,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = { static struct midr_range trbe_write_out_of_range_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2253138 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -723,10 +726,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_clear_bf16_from_user_emulation, }, #endif -#ifdef CONFIG_ARM64_ERRATUM_2966298 +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD { .desc = "ARM erratum 2966298", - .capability = ARM64_WORKAROUND_2966298, + .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD, /* Cortex-A520 r0p0 - r0p1 */ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), }, diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index de16fa917e1b..62146d48dba7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -419,7 +419,7 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 -alternative_if ARM64_WORKAROUND_2966298 +alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD tlbi vale1, xzr dsb nsh alternative_else_nop_endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8c226d79abdf..59b5a16bab5d 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1347,6 +1347,20 @@ void __init sme_setup(void) get_sme_default_vl()); } +void sme_suspend_exit(void) +{ + u64 smcr = 0; + + if (!system_supports_sme()) + return; + + if (system_supports_fa64()) + smcr |= SMCR_ELx_FA64; + + write_sysreg_s(smcr, SYS_SMCR_EL1); + write_sysreg_s(0, SYS_SMPRI_EL1); +} + #endif /* CONFIG_ARM64_SME */ static void sve_init_regs(void) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 38dbd3828f13..d2850b49a371 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -46,17 +47,17 @@ static void init_irq_scs(void) for_each_possible_cpu(cpu) per_cpu(irq_shadow_call_stack_ptr, cpu) = - scs_alloc(cpu_to_node(cpu)); + scs_alloc(early_cpu_to_node(cpu)); } #ifdef CONFIG_VMAP_STACK -static void init_irq_stacks(void) +static void __init init_irq_stacks(void) { int cpu; unsigned long *p; for_each_possible_cpu(cpu) { - p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu)); + p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu)); per_cpu(irq_stack_ptr, cpu) = p; } } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 7b0643fe2f13..214b1805e536 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -168,7 +168,11 @@ armv8pmu_events_sysfs_show(struct device *dev, PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config) static struct attribute *armv8_pmuv3_event_attrs[] = { - ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), + /* + * Don't expose the sw_incr event in /sys. It's not usable as writes to + * PMSWINC_EL0 will trap as PMUSERENR.{SW,EN}=={0,0} and event rotation + * means we don't have a fixed event<->counter relationship regardless. + */ ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e1f6366b7ccd..d02dd2be17b3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1450,7 +1450,8 @@ static const struct user_regset aarch64_regsets[] = { #ifdef CONFIG_ARM64_SVE [REGSET_SVE] = { /* Scalable Vector Extension */ .core_note_type = NT_ARM_SVE, - .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX, + SVE_PT_REGS_SVE), SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 8b02d310838f..064d996cc55b 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,8 @@ void notrace __cpu_suspend_exit(void) */ spectre_v4_enable_mitigation(NULL); + sme_suspend_exit(); + /* Restore additional feature-specific configuration */ ptrauth_suspend_exit(); } diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 36c8f66cad25..d513533cc922 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -68,11 +68,9 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -Wdeclaration-after-statement \ -std=gnu11 VDSO_CFLAGS += -O2 # Some useful compiler-dependent flags from top-level Makefile -VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,) VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) VDSO_CFLAGS += -fno-strict-overflow VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 3c344e4cd4ca..092327665a6e 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -462,6 +462,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + if (!irq) + continue; + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); @@ -1427,6 +1430,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, for (i = 0; i < irq_count; i++) { irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; update_affinity(irq, vcpu2); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index e73830d9f136..2dc7ddee5f04 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -71,7 +71,6 @@ WORKAROUND_2064142 WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2658417 -WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE @@ -87,3 +86,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP WORKAROUND_QCOM_FALKOR_E1003 WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT +WORKAROUND_SPECULATIVE_UNPRIV_LOAD diff --git a/arch/csky/include/asm/irq_work.h b/arch/csky/include/asm/irq_work.h index 33aaf39d6f94..d39fcc1f5395 100644 --- a/arch/csky/include/asm/irq_work.h +++ b/arch/csky/include/asm/irq_work.h @@ -7,5 +7,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } -extern void arch_irq_work_raise(void); + #endif /* __ASM_CSKY_IRQ_WORK_H */ diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h index 98a3f4b168bd..ef2e37a10a0f 100644 --- a/arch/csky/include/asm/jump_label.h +++ b/arch/csky/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop32 \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" @@ -29,7 +29,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: bsr32 %l[label] \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 57465bff1fe4..df7f349c8d4f 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -64,6 +64,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG ELF_DETAILS + .hexagon.attributes 0 : { *(.hexagon.attributes) } DISCARDS } diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e737dc8cd660..fa3171f56327 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -9,6 +9,7 @@ config LOONGARCH select ARCH_BINFMT_ELF_STATE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL @@ -80,6 +81,7 @@ config LOONGARCH select GPIOLIB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE @@ -461,23 +463,6 @@ config PHYSICAL_START specified in the "crashkernel=YM@XM" command line boot parameter passed to the panic-ed kernel). -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - endmenu config ARCH_SELECT_MEMORY_MODEL @@ -495,10 +480,6 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. -config ARCH_ENABLE_THP_MIGRATION - def_bool y - depends on TRANSPARENT_HUGEPAGE - config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 402a7d9e3a53..427d147f30d7 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -72,6 +72,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l)) #define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l)) +#define __io_aw() mmiowb() + #include #define ARCH_HAS_VALID_PHYS_ADDR_RANGE diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 302f0e33975a..c90c56094168 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -25,7 +25,12 @@ static inline void set_my_cpu_offset(unsigned long off) __my_cpu_offset = off; csr_write64(off, PERCPU_BASE_KS); } -#define __my_cpu_offset __my_cpu_offset + +#define __my_cpu_offset \ +({ \ + __asm__ __volatile__("":"+r"(__my_cpu_offset)); \ + __my_cpu_offset; \ +}) #define PERCPU_OP(op, asm_op, c_op) \ static __always_inline unsigned long __percpu_##op(void *ptr, \ diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 14508d429ffa..18a2b37f4aea 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -297,6 +297,7 @@ void play_dead(void) addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); } while (addr == 0); + local_irq_disable(); init_fn = (void *)TO_CACHE(addr); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); @@ -471,7 +472,7 @@ asmlinkage void start_secondary(void) unsigned int cpu; sync_counter(); - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); cpu_probe(); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index da3681f131c8..eeb2d815cfa2 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -271,12 +271,16 @@ void setup_tlb_handler(int cpu) set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); - } + } else { + int vec_sz __maybe_unused; + void *addr __maybe_unused; + struct page *page __maybe_unused; + + /* Avoid lockdep warning */ + rcu_cpu_starting(cpu); + #ifdef CONFIG_NUMA - else { - void *addr; - struct page *page; - const int vec_sz = sizeof(exception_handlers); + vec_sz = sizeof(exception_handlers); if (pcpu_handlers[cpu]) return; @@ -292,8 +296,8 @@ void setup_tlb_handler(int cpu) csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); - } #endif + } } void tlb_init(int cpu) diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index 4044eaf989ac..0921ddda11a4 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " .set pop" : "=&r" (sum), "=&r" (tmp) : "r" (saddr), "r" (daddr), - "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)); + "0" (htonl(len)), "r" (htonl(proto)), "r" (sum) + : "memory"); return csum_fold(sum); } diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index c5c6864e64bc..405c85173f2c 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -36,7 +36,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + asm goto("1:\t" B_INSN " 2f\n\t" "2:\t.insn\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" @@ -50,7 +50,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t" + asm goto("1:\t" J_INSN " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index daf3cf244ea9..b3e4dd6be7e2 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -60,6 +60,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->cp0_epc = val; + regs->cp0_cause &= ~CAUSEF_BD; } /* Query offset/name of register from its name/offset */ diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index 5582a4ca1e9e..7aa2c2360ff6 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_MIPS_FP_SUPPORT @@ -309,6 +310,11 @@ void mips_set_personality_nan(struct arch_elf_state *state) struct cpuinfo_mips *c = &boot_cpu_data; struct task_struct *t = current; + /* Do this early so t->thread.fpu.fcr31 won't be clobbered in case + * we are preempted before the lose_fpu(0) in start_thread. + */ + lose_fpu(0); + t->thread.fpu.fcr31 = c->fpu_csr31; switch (state->nan_2008) { case 0: diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 246c6a6b0261..5b778995d448 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64]; void reserve_exception_space(phys_addr_t addr, unsigned long size) { - memblock_reserve(addr, size); + /* + * reserve exception space on CPUs other than CPU0 + * is too late, since memblock is unavailable when APs + * up + */ + if (smp_processor_id() == 0) + memblock_reserve(addr, size); } void __init *set_except_vector(int n, void *addr) diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index be4829cc7a3a..28da4e720d17 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -114,10 +114,9 @@ void __init prom_init(void) prom_init_cmdline(); #if defined(CONFIG_MIPS_MT_SMP) - if (cpu_has_mipsmt) { - lantiq_smp_ops = vsmp_smp_ops; + lantiq_smp_ops = vsmp_smp_ops; + if (cpu_has_mipsmt) lantiq_smp_ops.init_secondary = lantiq_init_secondary; - register_smp_ops(&lantiq_smp_ops); - } + register_smp_ops(&lantiq_smp_ops); #endif } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 5a8002839550..e8660d06f663 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -417,7 +417,12 @@ void __init paging_init(void) (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; } + + max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; +#else + max_mapnr = max_low_pfn; #endif + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); free_area_init(max_zone_pfns); } @@ -453,13 +458,6 @@ void __init mem_init(void) */ BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (_PFN_SHIFT > PAGE_SHIFT)); -#ifdef CONFIG_HIGHMEM - max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; -#else - max_mapnr = max_low_pfn; -#endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - maar_init(); memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 345d5e021484..abf39ecda6fb 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -24,7 +24,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_TABLE_SORT select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_KERNEL_BZIP2 diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 74d17d7e759d..000a28e1c5e8 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -97,26 +97,28 @@ * version takes two arguments: a src and destination register. * However, the source and destination registers can not be * the same register. + * + * We use add,l to avoid clobbering the C/B bits in the PSW. */ .macro tophys grvirt, grphys - ldil L%(__PAGE_OFFSET), \grphys - sub \grvirt, \grphys, \grphys + ldil L%(-__PAGE_OFFSET), \grphys + addl \grvirt, \grphys, \grphys .endm - + .macro tovirt grphys, grvirt ldil L%(__PAGE_OFFSET), \grvirt - add \grphys, \grvirt, \grvirt + addl \grphys, \grvirt, \grvirt .endm .macro tophys_r1 gr - ldil L%(__PAGE_OFFSET), %r1 - sub \gr, %r1, \gr + ldil L%(-__PAGE_OFFSET), %r1 + addl \gr, %r1, \gr .endm - + .macro tovirt_r1 gr ldil L%(__PAGE_OFFSET), %r1 - add \gr, %r1, \gr + addl \gr, %r1, \gr .endm .macro delay value @@ -576,6 +578,7 @@ .section __ex_table,"aw" ! \ .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ + or %r0,%r0,%r0 ! \ .previous diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index 3c43baca7b39..2aceebcd695c 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -40,7 +40,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) " addc %0, %5, %0\n" " addc %0, %3, %0\n" "1: ldws,ma 4(%1), %3\n" -" addib,< 0, %2, 1b\n" +" addib,> -1, %2, 1b\n" " addc %0, %3, %0\n" "\n" " extru %0, 31, 16, %4\n" @@ -126,6 +126,7 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, ** Try to keep 4 registers with "live" values ahead of the ALU. */ +" depdi 0, 31, 32, %0\n"/* clear upper half of incoming checksum */ " ldd,ma 8(%1), %4\n" /* get 1st saddr word */ " ldd,ma 8(%2), %5\n" /* get 1st daddr word */ " add %4, %0, %0\n" @@ -137,8 +138,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */ " extrd,u %0, 31, 32, %4\n"/* copy upper half down */ " depdi 0, 31, 32, %0\n"/* clear upper half */ -" add %4, %0, %0\n" /* fold into 32-bits */ -" addc 0, %0, %0\n" /* add carry */ +" add,dc %4, %0, %0\n" /* fold into 32-bits, plus carry */ +" addc 0, %0, %0\n" /* add final carry */ #else @@ -163,7 +164,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " ldw,ma 4(%2), %7\n" /* 4th daddr */ " addc %6, %0, %0\n" " addc %7, %0, %0\n" -" addc %3, %0, %0\n" /* fold in proto+len, catch carry */ +" addc %3, %0, %0\n" /* fold in proto+len */ +" addc 0, %0, %0\n" /* add carry */ #endif : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len), diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h new file mode 100644 index 000000000000..4ea23e3d79dc --- /dev/null +++ b/arch/parisc/include/asm/extable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_EXTABLE_H +#define __PARISC_EXTABLE_H + +#include +#include + +/* + * The exception table consists of three addresses: + * + * - A relative address to the instruction that is allowed to fault. + * - A relative address at which the program should continue (fixup routine) + * - An asm statement which specifies which CPU register will + * receive -EFAULT when an exception happens if the lowest bit in + * the fixup address is set. + * + * Note: The register specified in the err_opcode instruction will be + * modified at runtime if a fault happens. Register %r0 will be ignored. + * + * Since relative addresses are used, 32bit values are sufficient even on + * 64bit kernel. + */ + +struct pt_regs; +int fixup_exception(struct pt_regs *regs); + +#define ARCH_HAS_RELATIVE_EXTABLE +struct exception_table_entry { + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ + int err_opcode; /* sample opcode with register which holds error code */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\ + ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \ + opcode "\n" \ + ".previous\n" + +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT on fault into the register specified by the err_opcode instruction, + * and zeroes the target register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_VAR(__err_var) \ + int __err_var = 0 +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register) + +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->err_opcode = b->err_opcode; + b->err_opcode = tmp.err_opcode; +} +#define swap_ex_entry_fixup swap_ex_entry_fixup + +#endif diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index 94428798b6aa..317ebc5edc9f 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" @@ -29,7 +29,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index c822bd0c0e3c..51f40eaf7780 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -8,7 +8,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ @@ -22,7 +23,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%%sr3,%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 4165079898d9..88d0ae5769dd 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,6 +7,7 @@ */ #include #include +#include #include #include @@ -26,37 +27,6 @@ #define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif -/* - * The exception table contains two values: the first is the relative offset to - * the address of the instruction that is allowed to fault, and the second is - * the relative offset to the address of the fixup routine. Since relative - * addresses are used, 32bit values are sufficient even on 64bit kernel. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE -struct exception_table_entry { - int insn; /* relative address of insn that is allowed to fault. */ - int fixup; /* relative address of fixup routine */ -}; - -#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ - ".section __ex_table,\"aw\"\n" \ - ".align 4\n" \ - ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ - ".previous\n" - -/* - * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry - * (with lowest bit set) for which the fault handler in fixup_exception() will - * load -EFAULT into %r29 for a read or write fault, and zeroes the target - * register in case of a read fault in get_user(). - */ -#define ASM_EXCEPTIONTABLE_REG 29 -#define ASM_EXCEPTIONTABLE_VAR(__variable) \ - register long __variable __asm__ ("r29") = 0 -#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ - ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) - #define __get_user_internal(sr, val, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__gu_err); \ @@ -83,7 +53,7 @@ struct exception_table_entry { \ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ : "=r"(__gu_val), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -115,8 +85,8 @@ struct exception_table_entry { "1: ldw 0(%%sr%2,%3),%0\n" \ "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -174,7 +144,7 @@ struct exception_table_entry { __asm__ __volatile__ ( \ "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(x), "i"(sr), "r"(ptr)) @@ -186,15 +156,14 @@ struct exception_table_entry { "1: stw %1,0(%%sr%2,%3)\n" \ "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ - /* * Complex access routines -- external declarations */ @@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -struct pt_regs; -int fixup_exception(struct pt_regs *regs); - #endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 8f12b9f318ae..a582928739dd 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -1003,6 +1003,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) pr_info("\n"); + /* Prevent hung task messages when printing on serial console */ + cond_resched(); + pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n", hpa, parisc_hardware_description(&dev->id)); diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 3e051a973e9b..92276bcf19d7 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -123,10 +123,10 @@ static unsigned long f_extend(unsigned long address) #ifdef CONFIG_64BIT if(unlikely(parisc_narrow_firmware)) { if((address & 0xff000000) == 0xf0000000) - return 0xf0f0f0f000000000UL | (u32)address; + return (0xfffffff0UL << 32) | (u32)address; if((address & 0xf0000000) == 0xf0000000) - return 0xffffffff00000000UL | (u32)address; + return (0xffffffffUL << 32) | (u32)address; } #endif return address; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 4d392e4ed358..ac7253891d5e 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -78,7 +78,7 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, #endif } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_FUNCTION_GRAPH_TRACER) int ftrace_enable_ftrace_graph_caller(void) { static_key_enable(&ftrace_graph_enable.key); diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 1f6c776d8081..c67883487ecd 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -171,7 +171,6 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; - set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -466,13 +465,6 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { - unsigned int cpu; - reset_cpu_topology(); - - /* reset possible mask. We will mark those which are possible. */ - for_each_possible_cpu(cpu) - set_cpu_possible(cpu, false); - register_parisc_driver(&cpu_driver); } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index e8a4d77cff53..782ee05e2088 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -118,8 +118,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) "2: ldbs 1(%%sr1,%3), %0\n" " depw %2, 23, 24, %0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1) : "r" (saddr), "r" (regs->isr) ); @@ -150,8 +150,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) : "r" (saddr), "r" (regs->isr) ); @@ -167,6 +167,7 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) { unsigned long saddr = regs->ior; + unsigned long shift, temp1; __u64 val = 0; ASM_EXCEPTIONTABLE_VAR(ret); @@ -178,25 +179,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) #ifdef CONFIG_64BIT __asm__ __volatile__ ( -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ -" mtsp %4, %%sr1\n" -" depd %%r0,63,3,%3\n" -"1: ldd 0(%%sr1,%3),%0\n" -"2: ldd 8(%%sr1,%3),%%r20\n" -" subi 64,%%r19,%%r19\n" -" mtsar %%r19\n" -" shrpd %0,%%r20,%%sar,%0\n" +" depd,z %2,60,3,%3\n" /* shift=(ofs&7)*8 */ +" mtsp %5, %%sr1\n" +" depd %%r0,63,3,%2\n" +"1: ldd 0(%%sr1,%2),%0\n" +"2: ldd 8(%%sr1,%2),%4\n" +" subi 64,%3,%3\n" +" mtsar %3\n" +" shrpd %0,%4,%%sar,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) - : "=r" (val), "+r" (ret) - : "0" (val), "r" (saddr), "r" (regs->isr) - : "r19", "r20" ); + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") + : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) + : "r" (regs->isr) ); #else - { - unsigned long shift, temp1; __asm__ __volatile__ ( -" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */ +" zdep %2,29,2,%3\n" /* shift=(ofs&3)*8 */ " mtsp %5, %%sr1\n" " dep %%r0,31,2,%2\n" "1: ldw 0(%%sr1,%2),%0\n" @@ -207,12 +205,11 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " vshd %0,%R0,%0\n" " vshd %R0,%4,%R0\n" "4: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); - } #endif DPRINTF("val = 0x%llx\n", val); @@ -242,8 +239,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg) "1: stb %1, 0(%%sr1, %3)\n" "2: stb %2, 1(%%sr1, %3)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret), "=&r" (temp1) : "r" (val), "r" (regs->ior), "r" (regs->isr) ); @@ -283,8 +280,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -327,10 +324,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" "5: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -356,11 +353,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" "6: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0") : "+r" (ret) : "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index b00aa98b582c..fbd9ada5e527 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs) * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant * bit in the relative address of the fixup routine to indicate - * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with - * -EFAULT to report a userspace access error. + * that the register encoded in the "or %r0,%r0,register" + * opcode should be loaded with -EFAULT to report a userspace + * access error. */ if (fix->fixup & 1) { - regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT; + int fault_error_reg = fix->err_opcode & 0x1f; + if (!WARN_ON(!fault_error_reg)) + regs->gr[fault_error_reg] = -EFAULT; + pr_debug("Unalignment fixup of register %d at %pS\n", + fault_error_reg, (void*)regs->iaoq[0]); /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2c94f9cf1ce0..6050e6e10d32 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -806,7 +806,6 @@ config THREAD_SHIFT int "Thread shift" if EXPERT range 13 15 default "15" if PPC_256K_PAGES - default "15" if PPC_PSERIES || PPC_POWERNV default "14" if PPC64 default "13" help diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 61a4736355c2..20d5052e2292 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -74,7 +74,7 @@ ##__VA_ARGS__) #define WARN_ENTRY(insn, flags, label, ...) \ - asm_volatile_goto( \ + asm goto( \ "1: " insn "\n" \ EX_TABLE(1b, %l[label]) \ _EMIT_BUG_ENTRY \ diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h index b8b0be8f1a07..c6d3078bd8c3 100644 --- a/arch/powerpc/include/asm/irq_work.h +++ b/arch/powerpc/include/asm/irq_work.h @@ -6,6 +6,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } -extern void arch_irq_work_raise(void); #endif /* _ASM_POWERPC_IRQ_WORK_H */ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 93ce3ec25387..2f2a86ed2280 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" @@ -32,7 +32,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 94b981152667..07fabb054aea 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -417,5 +417,9 @@ extern void *abatron_pteptrs[2]; #include #endif +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) +#define __HAVE_ARCH_RESERVED_KERNEL_PAGES +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MMU_H_ */ diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 4c6c6dbd182f..da827d2d0866 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -42,14 +42,6 @@ u64 memory_hotplug_max(void); #else #define memory_hotplug_max() memblock_end_of_DRAM() #endif /* CONFIG_NUMA */ -#ifdef CONFIG_FA_DUMP -#define __HAVE_ARCH_RESERVED_KERNEL_PAGES -#endif - -#ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, - int nid, pgprot_t prot); -#endif #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h index a21f529c43d9..8359c06d92d9 100644 --- a/arch/powerpc/include/asm/reg_fsl_emb.h +++ b/arch/powerpc/include/asm/reg_fsl_emb.h @@ -12,9 +12,16 @@ #ifndef __ASSEMBLY__ /* Performance Monitor Registers */ #define mfpmr(rn) ({unsigned int rval; \ - asm volatile("mfpmr %0," __stringify(rn) \ + asm volatile(".machine push; " \ + ".machine e300; " \ + "mfpmr %0," __stringify(rn) ";" \ + ".machine pop; " \ : "=r" (rval)); rval;}) -#define mtpmr(rn, v) asm volatile("mtpmr " __stringify(rn) ",%0" : : "r" (v)) +#define mtpmr(rn, v) asm volatile(".machine push; " \ + ".machine e300; " \ + "mtpmr " __stringify(rn) ",%0; " \ + ".machine pop; " \ + : : "r" (v)) #endif /* __ASSEMBLY__ */ /* Freescale Book E Performance Monitor APU Registers */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index af58f1ed3952..c4b798aa6ce8 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15 #define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) #else #define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 3ddc65c63a49..45d4c9cf3f3a 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -72,7 +72,7 @@ __pu_failed: \ * are no aliasing issues. */ #define __put_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -85,7 +85,7 @@ __pu_failed: \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ @@ -132,7 +132,7 @@ do { \ #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define __get_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: "op"%U1%X1 %0, %1 # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ @@ -145,7 +145,7 @@ do { \ __get_user_asm_goto(x, addr, label, "ld") #else /* __powerpc64__ */ #define __get_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: lwz%X1 %0, %1\n" \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h index 4c69ece52a31..59ed89890c90 100644 --- a/arch/powerpc/include/asm/vmalloc.h +++ b/arch/powerpc/include/asm/vmalloc.h @@ -7,14 +7,14 @@ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP #define arch_vmap_pud_supported arch_vmap_pud_supported -static inline bool arch_vmap_pud_supported(pgprot_t prot) +static __always_inline bool arch_vmap_pud_supported(pgprot_t prot) { /* HPT does not cope with large pages in the vmalloc area */ return radix_enabled(); } #define arch_vmap_pmd_supported arch_vmap_pmd_supported -static inline bool arch_vmap_pmd_supported(pgprot_t prot) +static __always_inline bool arch_vmap_pmd_supported(pgprot_t prot) { return radix_enabled(); } diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h index ceb06b109f83..2ae8e9a7b461 100644 --- a/arch/powerpc/kernel/cpu_specs_e500mc.h +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -8,7 +8,8 @@ #ifdef CONFIG_PPC64 #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_HAS_FPU | PPC_FEATURE_64) + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \ + PPC_FEATURE_BOOKE) #else #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index a019ed6fc839..26c151e2a794 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) - std r11,_NIP(r1) + std r11,_LINK(r1) + std r11,_NIP(r1) /* Saved LR is also the next instruction */ std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) @@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r9,GPR13(r1) SAVE_NVGPRS(r1) std r11,_XER(r1) - std r11,_LINK(r1) std r11,_CTR(r1) li r11,\trapnr diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 9dc0ad3c533a..5a6e44e4d36f 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -230,7 +230,7 @@ again: * This allows interrupts to be unmasked without hard disabling, and * also without new hard interrupts coming in ahead of pending ones. */ - asm_volatile_goto( + asm goto( "1: \n" " lbz 9,%0(13) \n" " cmpwi 9,0 \n" diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8537c354c560..9531ab90feb8 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -369,6 +369,18 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (IS_ENABLED(CONFIG_PPC64)) boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); + if (nr_cpu_ids % nthreads != 0) { + set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads)); + pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n", + nr_cpu_ids); + } + + if (boot_cpuid >= nr_cpu_ids) { + set_nr_cpu_ids(min(CONFIG_NR_CPUS, ALIGN(boot_cpuid + 1, nthreads))); + pr_warn("Boot CPU %d >= nr_cpu_ids, adjusted nr_cpu_ids to %d\n", + boot_cpuid, nr_cpu_ids); + } + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3956f32682c6..362b712386f6 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1439,10 +1439,12 @@ static int emulate_instruction(struct pt_regs *regs) return -EINVAL; } +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { return is_kernel_addr(addr); } +#endif #ifdef CONFIG_MATH_EMULATION static int emulate_math(struct pt_regs *regs) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9b394bab17eb..374b82cf13d9 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -72,7 +72,7 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o -CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) +CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) # Enable CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 398b5694aeb7..ec30af8eadb7 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -586,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -636,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -680,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u = {}; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -707,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index f6151a589298..87aa76c73799 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -463,6 +463,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, set_pte_at(vma->vm_mm, addr, ptep, pte); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * For hash translation mode, we use the deposited table to store hash slot * information and they are stored at PTRS_PER_PMD offset from related pmd @@ -484,6 +485,7 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } +#endif /* * Does the CPU support tlbie? diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 119ef491f797..d3a7726ecf51 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -126,7 +126,7 @@ void pgtable_cache_add(unsigned int shift) * as to leave enough 0 bits in the address to contain it. */ unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; + struct kmem_cache *new = NULL; /* It would be nice if this was a BUILD_BUG_ON(), but at the * moment, gcc doesn't seem to recognize is_power_of_2 as a @@ -139,7 +139,8 @@ void pgtable_cache_add(unsigned int shift) align = max_t(unsigned long, align, minalign); name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); + if (name) + new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); if (!new) panic("Could not allocate pgtable cache for order %d", shift); diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index a70828a6d935..aa9aa11927b2 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size) if (ret) return ret; + k_start = k_start & PAGE_MASK; block = memblock_alloc(k_end - k_start, PAGE_SIZE); if (!block) return -ENOMEM; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index bd9784f77f2e..71250605b784 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -179,3 +179,8 @@ static inline bool debug_pagealloc_enabled_or_kfence(void) { return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); } + +#ifdef CONFIG_MEMORY_HOTPLUG +int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); +#endif diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 7ff8ff3509f5..943248a0e9a9 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -164,6 +164,20 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + /* + * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', + * specifies that the current buffer size cannot accommodate + * all the information and a partial buffer returned. + * Since in this function we are only accessing data for a given starting index, + * we don't need to accommodate whole data and can get required count by + * accessing first entry data. + * Hence hcall fails only incase the ret value is other than H_SUCCESS or + * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). + */ + if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) + ret = 0; + if (ret) { pr_devel("hcall failed: 0x%lx\n", ret); goto out; @@ -228,6 +242,7 @@ static int h_gpci_event_init(struct perf_event *event) { u64 count; u8 length; + unsigned long ret; /* Not our event */ if (event->attr.type != event->pmu->type) @@ -258,13 +273,23 @@ static int h_gpci_event_init(struct perf_event *event) } /* check if the request works... */ - if (single_gpci_request(event_get_request(event), + ret = single_gpci_request(event_get_request(event), event_get_starting_index(event), event_get_secondary_index(event), event_get_counter_info_version(event), event_get_offset(event), length, - &count)) { + &count); + + /* + * ret value as H_AUTHORITY implies that partition is not permitted to retrieve + * performance information, and required to set + * "Enable Performance Information Collection" option. + */ + if (ret == H_AUTHORITY) + return -EPERM; + + if (ret) { pr_devel("gpci hcall failed\n"); return -EINVAL; } diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 1830e1ac1f8f..107a8b60ad0c 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -99,9 +99,6 @@ static void __init linkstation_init_IRQ(void) mpic_init(mpic); } -extern void avr_uart_configure(void); -extern void avr_uart_send(const char); - static void __noreturn linkstation_restart(char *cmd) { local_irq_disable(); diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h index 5ad12023e562..ebc258fa4858 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc10x.h +++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h @@ -156,4 +156,7 @@ int mpc10x_disable_store_gathering(struct pci_controller *hose); /* For MPC107 boards that use the built-in openpic */ void mpc10x_set_openpic(void); +void avr_uart_configure(void); +void avr_uart_send(const char c); + #endif /* __PPC_KERNEL_MPC10X_H */ diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 97b026130c71..1e5f083cdb72 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -569,29 +569,6 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table_ops iommu_table_lpar_multi_ops; -/* - * iommu_table_setparms_lpar - * - * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. - */ -static void iommu_table_setparms_lpar(struct pci_controller *phb, - struct device_node *dn, - struct iommu_table *tbl, - struct iommu_table_group *table_group, - const __be32 *dma_window) -{ - unsigned long offset, size, liobn; - - of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); - - iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, - &iommu_table_lpar_multi_ops); - - - table_group->tce32_start = offset; - table_group->tce32_size = size; -} - struct iommu_table_ops iommu_table_pseries_ops = { .set = tce_build_pSeries, .clear = tce_free_pSeries, @@ -719,26 +696,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { * dynamic 64bit DMA window, walking up the device tree. */ static struct device_node *pci_dma_find(struct device_node *dn, - const __be32 **dma_window) + struct dynamic_dma_window_prop *prop) { - const __be32 *dw = NULL; + const __be32 *default_prop = NULL; + const __be32 *ddw_prop = NULL; + struct device_node *rdn = NULL; + bool default_win = false, ddw_win = false; for ( ; dn && PCI_DN(dn); dn = dn->parent) { - dw = of_get_property(dn, "ibm,dma-window", NULL); - if (dw) { - if (dma_window) - *dma_window = dw; - return dn; + default_prop = of_get_property(dn, "ibm,dma-window", NULL); + if (default_prop) { + rdn = dn; + default_win = true; } - dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); - if (dw) - return dn; - dw = of_get_property(dn, DMA64_PROPNAME, NULL); - if (dw) - return dn; + ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; + } + ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; + } + + /* At least found default window, which is the case for normal boot */ + if (default_win) + break; } - return NULL; + /* For PCI devices there will always be a DMA window, either on the device + * or parent bus + */ + WARN_ON(!(default_win | ddw_win)); + + /* caller doesn't want to get DMA window property */ + if (!prop) + return rdn; + + /* parse DMA window property. During normal system boot, only default + * DMA window is passed in OF. But, for kdump, a dedicated adapter might + * have both default and DDW in FDT. In this scenario, DDW takes precedence + * over default window. + */ + if (ddw_win) { + struct dynamic_dma_window_prop *p; + + p = (struct dynamic_dma_window_prop *)ddw_prop; + prop->liobn = p->liobn; + prop->dma_base = p->dma_base; + prop->tce_shift = p->tce_shift; + prop->window_shift = p->window_shift; + } else if (default_win) { + unsigned long offset, size, liobn; + + of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); + + prop->liobn = cpu_to_be32((u32)liobn); + prop->dma_base = cpu_to_be64(offset); + prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); + prop->window_shift = cpu_to_be32(order_base_2(size)); + } + + return rdn; } static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) @@ -746,17 +768,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - const __be32 *dma_window = NULL; + struct dynamic_dma_window_prop prop; dn = pci_bus_to_OF_node(bus); pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); - if (dma_window == NULL) - pr_debug(" no ibm,dma-window property !\n"); + /* In PPC architecture, there will always be DMA window on bus or one of the + * parent bus. During reboot, there will be ibm,dma-window property to + * define DMA window. For kdump, there will at least be default window or DDW + * or both. + */ ppci = PCI_DN(pdn); @@ -766,13 +791,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - if (dma_window) { - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); - } + iommu_table_setparms_common(tbl, ppci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); + + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -960,6 +996,12 @@ static void find_existing_ddw_windows_named(const char *name) continue; } + /* If at the time of system initialization, there are DDWs in OF, + * it means this is during kexec. DDW could be direct or dynamic. + * We will just mark DDWs as "dynamic" since this is kdump path, + * no need to worry about perforance. ddw_list_new_entry() will + * set window->direct = false. + */ window = ddw_list_new_entry(pdn, dma64); if (!window) { of_node_put(pdn); @@ -1525,8 +1567,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - const __be32 *dma_window = NULL; struct pci_dn *pci; + struct dynamic_dma_window_prop prop; pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); @@ -1539,7 +1581,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1552,8 +1594,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pci->table_group) { pci->table_group = iommu_pseries_alloc_group(pci->phb->node); tbl = pci->table_group->tables[0]; - iommu_table_setparms_lpar(pci->phb, pdn, tbl, - pci->table_group, dma_window); + + iommu_table_setparms_common(tbl, pci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); iommu_init_table(tbl, pci->phb->node, 0, 0); iommu_register_group(pci->table_group, diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 541199c6a587..5186d65d772e 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -660,8 +660,12 @@ u64 pseries_paravirt_steal_clock(int cpu) { struct lppaca *lppaca = &lppaca_of(cpu); - return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + - be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); + /* + * VPA steal time counters are reported at TB frequency. Hence do a + * conversion to ns before returning + */ + return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + + be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb))); } #endif diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c index 526c621b098b..eea2041b270b 100644 --- a/arch/powerpc/platforms/pseries/papr_platform_attributes.c +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -101,10 +101,12 @@ retry: esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); - if (temp_buf) + if (temp_buf) { buf = temp_buf; - else - return -ENOMEM; + } else { + ret = -ENOMEM; + goto out_buf; + } goto retry; } diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 07387f9c135c..72b87b08ab44 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -123,6 +123,7 @@ interrupt-parent = <&gpio>; interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index d47d87c2d7e3..dcf1bc9de584 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -25,6 +25,11 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ + +extern void *return_address(unsigned int level); + +#define ftrace_return_address(n) return_address(n) + void MCOUNT_NAME(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h index b53891964ae0..b27a4d64fc6a 100644 --- a/arch/riscv/include/asm/irq_work.h +++ b/arch/riscv/include/asm/irq_work.h @@ -6,5 +6,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return IS_ENABLED(CONFIG_SMP); } -extern void arch_irq_work_raise(void); + #endif /* _ASM_RISCV_IRQ_WORK_H */ diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h index 14a5ea8d8ef0..4a35d787c019 100644 --- a/arch/riscv/include/asm/jump_label.h +++ b/arch/riscv/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 59bb53da473d..63055c6ad2c2 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -79,7 +79,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START) +#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index ab333cb792fd..4c0805d264ca 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) @@ -41,6 +42,7 @@ obj-y += irq.o obj-y += process.o obj-y += ptrace.o obj-y += reset.o +obj-y += return_address.o obj-y += setup.o obj-y += signal.o obj-y += syscall_table.o diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c new file mode 100644 index 000000000000..c8115ec8fb30 --- /dev/null +++ b/arch/riscv/kernel/return_address.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code come from arch/arm64/kernel/return_address.c + * + * Copyright (C) 2023 SiFive. + */ + +#include +#include +#include + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static bool save_return_addr(void *d, unsigned long pc) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)pc; + return false; + } + + --data->level; + + return true; +} +NOKPROBE_SYMBOL(save_return_addr); + +noinline void *return_address(unsigned int level) +{ + struct return_address_data data; + + data.level = level + 3; + data.addr = NULL; + + arch_stack_walk(save_return_addr, &data, current, NULL); + + if (!data.level) + return data.addr; + else + return NULL; + +} +EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 526c3f40f6a2..7b66e81e5f87 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -601,7 +601,9 @@ static int ctr_aes_crypt(struct skcipher_request *req) * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { - cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr, + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk.src.virt.addr, nbytes); + cpacf_kmctr(sctx->fc, sctx->key, buf, buf, AES_BLOCK_SIZE, walk.iv); memcpy(walk.dst.virt.addr, buf, nbytes); crypto_inc(walk.iv, AES_BLOCK_SIZE); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 621322eb0e68..d84d87349718 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -688,9 +688,11 @@ static int ctr_paes_crypt(struct skcipher_request *req) * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk.src.virt.addr, nbytes); while (1) { if (cpacf_kmctr(ctx->fc, ¶m, buf, - walk.src.virt.addr, AES_BLOCK_SIZE, + buf, AES_BLOCK_SIZE, walk.iv) == AES_BLOCK_SIZE) break; if (__paes_convert_key(ctx)) diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h index 603783766d0a..f00c9f610d5a 100644 --- a/arch/s390/include/asm/irq_work.h +++ b/arch/s390/include/asm/irq_work.h @@ -7,6 +7,4 @@ static inline bool arch_irq_work_has_interrupt(void) return true; } -void arch_irq_work_raise(void); - #endif /* _ASM_S390_IRQ_WORK_H */ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 895f774bbcc5..bf78cf381dfc 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -25,7 +25,7 @@ */ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 0,%l[label]\n" + asm goto("0: brcl 0,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 15,%l[label]\n" + asm goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b1e98a9ed152..09abf000359f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -777,6 +777,13 @@ struct kvm_vm_stat { u64 inject_service_signal; u64 inject_virtio; u64 aen_forward; + u64 gmap_shadow_create; + u64 gmap_shadow_reuse; + u64 gmap_shadow_r1_entry; + u64 gmap_shadow_r2_entry; + u64 gmap_shadow_r3_entry; + u64 gmap_shadow_sg_entry; + u64 gmap_shadow_pg_entry; }; struct kvm_arch_memory_slot { diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 93d1ccd3304c..9c49c3d67cd5 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -78,6 +78,7 @@ typedef struct dasd_information2_t { * 0x040: give access to raw eckd data * 0x080: enable discard support * 0x100: enable autodisable for IFCC errors (default) + * 0x200: enable requeue of all requests on autoquiesce */ #define DASD_FEATURE_READONLY 0x001 #define DASD_FEATURE_USEDIAG 0x002 @@ -88,6 +89,7 @@ typedef struct dasd_information2_t { #define DASD_FEATURE_USERAW 0x040 #define DASD_FEATURE_DISCARD 0x080 #define DASD_FEATURE_PATH_AUTODISABLE 0x100 +#define DASD_FEATURE_REQUEUEQUIESCE 0x200 #define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE #define DASD_PARTN_BITS 2 diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 7ee3651d00ab..732024ca005a 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -166,5 +166,6 @@ int populate_cache_leaves(unsigned int cpu) ci_leaf_init(this_leaf++, pvt, ctype, level, cpu); } } + this_cpu_ci->cpu_map_populated = true; return 0; } diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 6826e2a69a21..f61a652046cf 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -647,7 +647,7 @@ static int __init attr_event_init(void) for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) { ret = attr_event_init_one(attrs, i); if (ret) { - attr_event_free(attrs, i - 1); + attr_event_free(attrs, i); return ret; } } diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 74b53c531e0c..b4d89654183a 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -612,7 +612,7 @@ static int __init attr_event_init(void) for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) { ret = attr_event_init_one(attrs, i); if (ret) { - attr_event_free(attrs, i - 1); + attr_event_free(attrs, i); return ret; } } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 092b16b4dd4f..6b442edb3857 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -385,6 +385,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* * floating point control reg. is in the thread structure */ + save_fpu_regs(); if ((unsigned int) data != 0 || test_fp_ctl(data >> (BITS_PER_LONG - 32))) return -EINVAL; @@ -741,6 +742,7 @@ static int __poke_user_compat(struct task_struct *child, /* * floating point control reg. is in the thread structure */ + save_fpu_regs(); if (test_fp_ctl(tmp)) return -EINVAL; child->thread.fpu.fpc = data; @@ -904,9 +906,7 @@ static int s390_fpregs_set(struct task_struct *target, int rc = 0; freg_t fprs[__NUM_FPRS]; - if (target == current) - save_fpu_regs(); - + save_fpu_regs(); if (MACHINE_HAS_VX) convert_vx_to_fp(fprs, target->thread.fpu.vxrs); else diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 245bddfe9bc0..cc513add48eb 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_32 += -m31 -s KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \ +LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \ --hash-style=both --build-id=sha1 -melf_s390 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 1605ba45ac4c..42d918d50a1f 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -26,7 +26,7 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \ +ldflags-y := -shared -soname=linux-vdso64.so.1 \ --hash-style=both --build-id=sha1 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 9436f3053b88..003c926a0f4d 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -210,13 +210,13 @@ void vtime_flush(struct task_struct *tsk) virt_timer_expire(); steal = S390_lowcore.steal_timer; - avg_steal = S390_lowcore.avg_steal_timer / 2; + avg_steal = S390_lowcore.avg_steal_timer; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } - S390_lowcore.avg_steal_timer = avg_steal; + S390_lowcore.avg_steal_timer = avg_steal / 2; } static u64 vtime_delta(void) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38d36..3beceff5f1c0 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1273,6 +1273,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, unsigned long *pgt, int *dat_protection, int *fake) { + struct kvm *kvm; struct gmap *parent; union asce asce; union vaddress vaddr; @@ -1281,6 +1282,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, *fake = 0; *dat_protection = 0; + kvm = sg->private; parent = sg->parent; vaddr.addr = saddr; asce.val = sg->orig_asce; @@ -1341,6 +1343,7 @@ shadow_r2t: rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r1_entry++; } fallthrough; case ASCE_TYPE_REGION2: { @@ -1369,6 +1372,7 @@ shadow_r3t: rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r2_entry++; } fallthrough; case ASCE_TYPE_REGION3: { @@ -1406,6 +1410,7 @@ shadow_sgt: rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r3_entry++; } fallthrough; case ASCE_TYPE_SEGMENT: { @@ -1439,6 +1444,7 @@ shadow_pgt: rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_sg_entry++; } } /* Return the parent address of the page table */ @@ -1509,6 +1515,7 @@ shadow_page: pte.p |= dat_protection; if (!rc) rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); + vcpu->kvm->stat.gmap_shadow_pg_entry++; ipte_unlock(vcpu->kvm); mmap_read_unlock(sg->mm); return rc; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3775363471f0..348d49268a7e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_COUNTER(VM, inject_pfault_done), STATS_DESC_COUNTER(VM, inject_service_signal), STATS_DESC_COUNTER(VM, inject_virtio), - STATS_DESC_COUNTER(VM, aen_forward) + STATS_DESC_COUNTER(VM, aen_forward), + STATS_DESC_COUNTER(VM, gmap_shadow_reuse), + STATS_DESC_COUNTER(VM, gmap_shadow_create), + STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry), }; const struct kvm_stats_header kvm_vm_stats_header = { @@ -4138,10 +4145,6 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) vcpu_load(vcpu); - if (test_fp_ctl(fpu->fpc)) { - ret = -EINVAL; - goto out; - } vcpu->run->s.regs.fpc = fpu->fpc; if (MACHINE_HAS_VX) convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, @@ -4149,7 +4152,6 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) else memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); -out: vcpu_put(vcpu); return ret; } diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 740f8b56e63f..d90c818a9ae7 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1206,15 +1206,17 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, * we're holding has been unshadowed. If the gmap is still valid, * we can safely reuse it. */ - if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) + if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) { + vcpu->kvm->stat.gmap_shadow_reuse++; return 0; + } /* release the old shadow - if any, and mark the prefix as unmapped */ release_gmap_shadow(vsie_page); gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); if (IS_ERR(gmap)) return PTR_ERR(gmap); - gmap->private = vcpu->kvm; + vcpu->kvm->stat.gmap_shadow_create++; WRITE_ONCE(vsie_page->gmap, gmap); return 0; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 243f673fa651..662cf23a1b44 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1675,6 +1675,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, return ERR_PTR(-ENOMEM); new->mm = parent->mm; new->parent = gmap_get(parent); + new->private = parent->private; new->orig_asce = asce; new->edat_level = edat_level; new->initialized = false; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 2c99f9552b2f..394c69fda399 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -241,7 +241,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, /* combine single writes by using store-block insn */ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { - zpci_memcpy_toio(to, from, count); + zpci_memcpy_toio(to, from, count * 8); } static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot) diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c index 83fc4536dcd5..93f4e0fdd38c 100644 --- a/arch/sparc/crypto/crop_devid.c +++ b/arch/sparc/crypto/crop_devid.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include -#include /* This is a dummy device table linked into all of the crypto * opcode drivers. It serves to trigger the module autoloading diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index e10ab9ad3097..836f6575aa1d 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -8,7 +8,7 @@ #define __ASM_SPARC_FLOPPY_H #include -#include +#include #include #include diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index 070c8c1f5c8f..6efeb24b0a92 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -11,7 +11,7 @@ #define __ASM_SPARC64_FLOPPY_H #include -#include +#include #include #include diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 94eb529dcb77..2718cbea826a 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -10,7 +10,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" @@ -26,7 +26,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes]\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h index 03b27090c0c8..e2eed8f97665 100644 --- a/arch/sparc/include/asm/parport.h +++ b/arch/sparc/include/asm/parport.h @@ -1,255 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* parport.h: sparc64 specific parport initialization and dma. - * - * Copyright (C) 1999 Eddie C. Dost (ecd@skynet.be) - */ +#ifndef ___ASM_SPARC_PARPORT_H +#define ___ASM_SPARC_PARPORT_H -#ifndef _ASM_SPARC64_PARPORT_H -#define _ASM_SPARC64_PARPORT_H 1 - -#include - -#include -#include -#include - -#define PARPORT_PC_MAX_PORTS PARPORT_MAX - -/* - * While sparc64 doesn't have an ISA DMA API, we provide something that looks - * close enough to make parport_pc happy - */ -#define HAS_DMA - -#ifdef CONFIG_PARPORT_PC_FIFO -static DEFINE_SPINLOCK(dma_spin_lock); - -#define claim_dma_lock() \ -({ unsigned long flags; \ - spin_lock_irqsave(&dma_spin_lock, flags); \ - flags; \ -}) - -#define release_dma_lock(__flags) \ - spin_unlock_irqrestore(&dma_spin_lock, __flags); +#if defined(__sparc__) && defined(__arch64__) +#include +#else +#include +#endif #endif -static struct sparc_ebus_info { - struct ebus_dma_info info; - unsigned int addr; - unsigned int count; - int lock; - - struct parport *port; -} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS]; - -static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS); - -static inline int request_dma(unsigned int dmanr, const char *device_id) -{ - if (dmanr >= PARPORT_PC_MAX_PORTS) - return -EINVAL; - if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0) - return -EBUSY; - return 0; -} - -static inline void free_dma(unsigned int dmanr) -{ - if (dmanr >= PARPORT_PC_MAX_PORTS) { - printk(KERN_WARNING "Trying to free DMA%d\n", dmanr); - return; - } - if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) { - printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr); - return; - } -} - -static inline void enable_dma(unsigned int dmanr) -{ - ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1); - - if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info, - sparc_ebus_dmas[dmanr].addr, - sparc_ebus_dmas[dmanr].count)) - BUG(); -} - -static inline void disable_dma(unsigned int dmanr) -{ - ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0); -} - -static inline void clear_dma_ff(unsigned int dmanr) -{ - /* nothing */ -} - -static inline void set_dma_mode(unsigned int dmanr, char mode) -{ - ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE)); -} - -static inline void set_dma_addr(unsigned int dmanr, unsigned int addr) -{ - sparc_ebus_dmas[dmanr].addr = addr; -} - -static inline void set_dma_count(unsigned int dmanr, unsigned int count) -{ - sparc_ebus_dmas[dmanr].count = count; -} - -static inline unsigned int get_dma_residue(unsigned int dmanr) -{ - return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info); -} - -static int ecpp_probe(struct platform_device *op) -{ - unsigned long base = op->resource[0].start; - unsigned long config = op->resource[1].start; - unsigned long d_base = op->resource[2].start; - unsigned long d_len; - struct device_node *parent; - struct parport *p; - int slot, err; - - parent = op->dev.of_node->parent; - if (of_node_name_eq(parent, "dma")) { - p = parport_pc_probe_port(base, base + 0x400, - op->archdata.irqs[0], PARPORT_DMA_NOFIFO, - op->dev.parent->parent, 0); - if (!p) - return -ENOMEM; - dev_set_drvdata(&op->dev, p); - return 0; - } - - for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) { - if (!test_and_set_bit(slot, dma_slot_map)) - break; - } - err = -ENODEV; - if (slot >= PARPORT_PC_MAX_PORTS) - goto out_err; - - spin_lock_init(&sparc_ebus_dmas[slot].info.lock); - - d_len = (op->resource[2].end - d_base) + 1UL; - sparc_ebus_dmas[slot].info.regs = - of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA"); - - if (!sparc_ebus_dmas[slot].info.regs) - goto out_clear_map; - - sparc_ebus_dmas[slot].info.flags = 0; - sparc_ebus_dmas[slot].info.callback = NULL; - sparc_ebus_dmas[slot].info.client_cookie = NULL; - sparc_ebus_dmas[slot].info.irq = 0xdeadbeef; - strcpy(sparc_ebus_dmas[slot].info.name, "parport"); - if (ebus_dma_register(&sparc_ebus_dmas[slot].info)) - goto out_unmap_regs; - - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1); - - /* Configure IRQ to Push Pull, Level Low */ - /* Enable ECP, set bit 2 of the CTR first */ - outb(0x04, base + 0x02); - ns87303_modify(config, PCR, - PCR_EPP_ENABLE | - PCR_IRQ_ODRAIN, - PCR_ECP_ENABLE | - PCR_ECP_CLK_ENA | - PCR_IRQ_POLAR); - - /* CTR bit 5 controls direction of port */ - ns87303_modify(config, PTR, - 0, PTR_LPT_REG_DIR); - - p = parport_pc_probe_port(base, base + 0x400, - op->archdata.irqs[0], - slot, - op->dev.parent, - 0); - err = -ENOMEM; - if (!p) - goto out_disable_irq; - - dev_set_drvdata(&op->dev, p); - - return 0; - -out_disable_irq: - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); - ebus_dma_unregister(&sparc_ebus_dmas[slot].info); - -out_unmap_regs: - of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len); - -out_clear_map: - clear_bit(slot, dma_slot_map); - -out_err: - return err; -} - -static int ecpp_remove(struct platform_device *op) -{ - struct parport *p = dev_get_drvdata(&op->dev); - int slot = p->dma; - - parport_pc_unregister_port(p); - - if (slot != PARPORT_DMA_NOFIFO) { - unsigned long d_base = op->resource[2].start; - unsigned long d_len; - - d_len = (op->resource[2].end - d_base) + 1UL; - - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); - ebus_dma_unregister(&sparc_ebus_dmas[slot].info); - of_iounmap(&op->resource[2], - sparc_ebus_dmas[slot].info.regs, - d_len); - clear_bit(slot, dma_slot_map); - } - - return 0; -} - -static const struct of_device_id ecpp_match[] = { - { - .name = "ecpp", - }, - { - .name = "parallel", - .compatible = "ecpp", - }, - { - .name = "parallel", - .compatible = "ns87317-ecpp", - }, - { - .name = "parallel", - .compatible = "pnpALI,1533,3", - }, - {}, -}; - -static struct platform_driver ecpp_driver = { - .driver = { - .name = "ecpp", - .of_match_table = ecpp_match, - }, - .probe = ecpp_probe, - .remove = ecpp_remove, -}; - -static int parport_pc_find_nonpci_ports(int autoirq, int autodma) -{ - return platform_driver_register(&ecpp_driver); -} - -#endif /* !(_ASM_SPARC64_PARPORT_H */ diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h new file mode 100644 index 000000000000..0a7ffcfd59cd --- /dev/null +++ b/arch/sparc/include/asm/parport_64.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* parport.h: sparc64 specific parport initialization and dma. + * + * Copyright (C) 1999 Eddie C. Dost (ecd@skynet.be) + */ + +#ifndef _ASM_SPARC64_PARPORT_H +#define _ASM_SPARC64_PARPORT_H 1 + +#include +#include + +#include +#include +#include + +#define PARPORT_PC_MAX_PORTS PARPORT_MAX + +/* + * While sparc64 doesn't have an ISA DMA API, we provide something that looks + * close enough to make parport_pc happy + */ +#define HAS_DMA + +#ifdef CONFIG_PARPORT_PC_FIFO +static DEFINE_SPINLOCK(dma_spin_lock); + +#define claim_dma_lock() \ +({ unsigned long flags; \ + spin_lock_irqsave(&dma_spin_lock, flags); \ + flags; \ +}) + +#define release_dma_lock(__flags) \ + spin_unlock_irqrestore(&dma_spin_lock, __flags); +#endif + +static struct sparc_ebus_info { + struct ebus_dma_info info; + unsigned int addr; + unsigned int count; + int lock; + + struct parport *port; +} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS]; + +static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS); + +static inline int request_dma(unsigned int dmanr, const char *device_id) +{ + if (dmanr >= PARPORT_PC_MAX_PORTS) + return -EINVAL; + if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0) + return -EBUSY; + return 0; +} + +static inline void free_dma(unsigned int dmanr) +{ + if (dmanr >= PARPORT_PC_MAX_PORTS) { + printk(KERN_WARNING "Trying to free DMA%d\n", dmanr); + return; + } + if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) { + printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr); + return; + } +} + +static inline void enable_dma(unsigned int dmanr) +{ + ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1); + + if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info, + sparc_ebus_dmas[dmanr].addr, + sparc_ebus_dmas[dmanr].count)) + BUG(); +} + +static inline void disable_dma(unsigned int dmanr) +{ + ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0); +} + +static inline void clear_dma_ff(unsigned int dmanr) +{ + /* nothing */ +} + +static inline void set_dma_mode(unsigned int dmanr, char mode) +{ + ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE)); +} + +static inline void set_dma_addr(unsigned int dmanr, unsigned int addr) +{ + sparc_ebus_dmas[dmanr].addr = addr; +} + +static inline void set_dma_count(unsigned int dmanr, unsigned int count) +{ + sparc_ebus_dmas[dmanr].count = count; +} + +static inline unsigned int get_dma_residue(unsigned int dmanr) +{ + return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info); +} + +static int ecpp_probe(struct platform_device *op) +{ + unsigned long base = op->resource[0].start; + unsigned long config = op->resource[1].start; + unsigned long d_base = op->resource[2].start; + unsigned long d_len; + struct device_node *parent; + struct parport *p; + int slot, err; + + parent = op->dev.of_node->parent; + if (of_node_name_eq(parent, "dma")) { + p = parport_pc_probe_port(base, base + 0x400, + op->archdata.irqs[0], PARPORT_DMA_NOFIFO, + op->dev.parent->parent, 0); + if (!p) + return -ENOMEM; + dev_set_drvdata(&op->dev, p); + return 0; + } + + for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) { + if (!test_and_set_bit(slot, dma_slot_map)) + break; + } + err = -ENODEV; + if (slot >= PARPORT_PC_MAX_PORTS) + goto out_err; + + spin_lock_init(&sparc_ebus_dmas[slot].info.lock); + + d_len = (op->resource[2].end - d_base) + 1UL; + sparc_ebus_dmas[slot].info.regs = + of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA"); + + if (!sparc_ebus_dmas[slot].info.regs) + goto out_clear_map; + + sparc_ebus_dmas[slot].info.flags = 0; + sparc_ebus_dmas[slot].info.callback = NULL; + sparc_ebus_dmas[slot].info.client_cookie = NULL; + sparc_ebus_dmas[slot].info.irq = 0xdeadbeef; + strcpy(sparc_ebus_dmas[slot].info.name, "parport"); + if (ebus_dma_register(&sparc_ebus_dmas[slot].info)) + goto out_unmap_regs; + + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1); + + /* Configure IRQ to Push Pull, Level Low */ + /* Enable ECP, set bit 2 of the CTR first */ + outb(0x04, base + 0x02); + ns87303_modify(config, PCR, + PCR_EPP_ENABLE | + PCR_IRQ_ODRAIN, + PCR_ECP_ENABLE | + PCR_ECP_CLK_ENA | + PCR_IRQ_POLAR); + + /* CTR bit 5 controls direction of port */ + ns87303_modify(config, PTR, + 0, PTR_LPT_REG_DIR); + + p = parport_pc_probe_port(base, base + 0x400, + op->archdata.irqs[0], + slot, + op->dev.parent, + 0); + err = -ENOMEM; + if (!p) + goto out_disable_irq; + + dev_set_drvdata(&op->dev, p); + + return 0; + +out_disable_irq: + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); + ebus_dma_unregister(&sparc_ebus_dmas[slot].info); + +out_unmap_regs: + of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len); + +out_clear_map: + clear_bit(slot, dma_slot_map); + +out_err: + return err; +} + +static int ecpp_remove(struct platform_device *op) +{ + struct parport *p = dev_get_drvdata(&op->dev); + int slot = p->dma; + + parport_pc_unregister_port(p); + + if (slot != PARPORT_DMA_NOFIFO) { + unsigned long d_base = op->resource[2].start; + unsigned long d_len; + + d_len = (op->resource[2].end - d_base) + 1UL; + + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); + ebus_dma_unregister(&sparc_ebus_dmas[slot].info); + of_iounmap(&op->resource[2], + sparc_ebus_dmas[slot].info.regs, + d_len); + clear_bit(slot, dma_slot_map); + } + + return 0; +} + +static const struct of_device_id ecpp_match[] = { + { + .name = "ecpp", + }, + { + .name = "parallel", + .compatible = "ecpp", + }, + { + .name = "parallel", + .compatible = "ns87317-ecpp", + }, + { + .name = "parallel", + .compatible = "pnpALI,1533,3", + }, + {}, +}; + +static struct platform_driver ecpp_driver = { + .driver = { + .name = "ecpp", + .of_match_table = ecpp_match, + }, + .probe = ecpp_probe, + .remove = ecpp_remove, +}; + +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ + return platform_driver_register(&ecpp_driver); +} + +#endif /* !(_ASM_SPARC64_PARPORT_H */ diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index ecd05bc0a104..d44725d37e30 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/auxio_32.c b/arch/sparc/kernel/auxio_32.c index a32d588174f2..989860e890c4 100644 --- a/arch/sparc/kernel/auxio_32.c +++ b/arch/sparc/kernel/auxio_32.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c index 774a82b0c649..2a2800d21325 100644 --- a/arch/sparc/kernel/auxio_64.c +++ b/arch/sparc/kernel/auxio_64.c @@ -10,7 +10,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c index 23f8838dd96e..a1a6485c9183 100644 --- a/arch/sparc/kernel/central.c +++ b/arch/sparc/kernel/central.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c index 6ff43df740e0..d5fad5fb04c1 100644 --- a/arch/sparc/kernel/chmc.c +++ b/arch/sparc/kernel/chmc.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 4e4f3d3263e4..e5a327799e57 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 39229940d725..4c61da491fee 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -8,9 +8,7 @@ #include #include #include -#include #include -#include #include #include diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index e5e5ff6b9a5c..3a73bc466f95 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -7,7 +7,8 @@ * Code is partially derived from pcic.c */ -#include +#include +#include #include #include #include diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c index e6935d0ac1ec..b2b639bee068 100644 --- a/arch/sparc/kernel/leon_pci_grpci1.c +++ b/arch/sparc/kernel/leon_pci_grpci1.c @@ -13,10 +13,11 @@ * Contributors: Daniel Hellstrom */ -#include #include #include +#include #include +#include #include #include @@ -696,7 +697,7 @@ err1: return err; } -static const struct of_device_id grpci1_of_match[] __initconst = { +static const struct of_device_id grpci1_of_match[] = { { .name = "GAISLER_PCIFBRG", }, diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index ca22f93d9045..ac2acd62a24e 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -6,12 +6,14 @@ * */ -#include #include #include #include #include #include +#include +#include + #include #include #include @@ -887,7 +889,7 @@ err1: return err; } -static const struct of_device_id grpci2_of_match[] __initconst = { +static const struct of_device_id grpci2_of_match[] = { { .name = "GAISLER_GRPCI2", }, diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 060fff95a305..fbf25e926f67 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -274,7 +274,7 @@ static int __init setup_nmi_watchdog(char *str) if (!strncmp(str, "panic", 5)) panic_on_timeout = 1; - return 0; + return 1; } __setup("nmi_watchdog=", setup_nmi_watchdog); diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 4ebf51e6e78e..9ac6853b34c1 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -7,8 +7,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 5a9f86b1d4e7..a8ccd7260fe7 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include #include @@ -9,8 +8,9 @@ #include #include #include -#include +#include #include +#include #include #include "of_device_common.h" diff --git a/arch/sparc/kernel/of_device_common.c b/arch/sparc/kernel/of_device_common.c index e717a56efc5d..a09724381bd4 100644 --- a/arch/sparc/kernel/of_device_common.c +++ b/arch/sparc/kernel/of_device_common.c @@ -1,15 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-only #include #include -#include #include #include #include #include +#include #include #include -#include #include +#include #include "of_device_common.h" diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index cb1ef25116e9..5637b37ba911 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -20,8 +20,9 @@ #include #include #include -#include +#include #include +#include #include #include diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c index 4759ccd542fe..5eeec9ad6845 100644 --- a/arch/sparc/kernel/pci_common.c +++ b/arch/sparc/kernel/pci_common.c @@ -8,7 +8,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c index 0ca08d455e80..0b91bde80fdc 100644 --- a/arch/sparc/kernel/pci_fire.c +++ b/arch/sparc/kernel/pci_fire.c @@ -10,7 +10,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h index 4e3d15189fa9..f31761f51757 100644 --- a/arch/sparc/kernel/pci_impl.h +++ b/arch/sparc/kernel/pci_impl.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 9ed11985768e..fc7402948b7b 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -5,6 +5,8 @@ */ #include #include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c index f413371da387..1efc98305ec7 100644 --- a/arch/sparc/kernel/pci_psycho.c +++ b/arch/sparc/kernel/pci_psycho.c @@ -13,7 +13,9 @@ #include #include #include -#include +#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 384480971805..0ddef827e0f9 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c index b5c1eb33b951..69a0206e56f0 100644 --- a/arch/sparc/kernel/pmc.c +++ b/arch/sparc/kernel/pmc.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c index d941875dd718..2f6c909e1755 100644 --- a/arch/sparc/kernel/power.c +++ b/arch/sparc/kernel/power.c @@ -9,7 +9,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/prom_irqtrans.c b/arch/sparc/kernel/prom_irqtrans.c index 28aff1c524b5..426bd08cb2ab 100644 --- a/arch/sparc/kernel/prom_irqtrans.c +++ b/arch/sparc/kernel/prom_irqtrans.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/arch/sparc/kernel/psycho_common.c b/arch/sparc/kernel/psycho_common.c index e90bcb6bad7f..5ee74b4c0cf4 100644 --- a/arch/sparc/kernel/psycho_common.c +++ b/arch/sparc/kernel/psycho_common.c @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c index 32141e1006c4..0bababf6f2bc 100644 --- a/arch/sparc/kernel/sbus.c +++ b/arch/sparc/kernel/sbus.c @@ -14,7 +14,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 8a08830e4a65..79934beba03a 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index bf3e6d2fe5d9..3afbbe5fba46 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -13,7 +13,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 9e3f6933ca13..14e178bfe33a 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -7,14 +7,15 @@ * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ - + #include #include #include #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index ae9a86cb6f3d..2b97df0850aa 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -449,9 +449,8 @@ static __init int vdso_setup(char *s) unsigned long val; err = kstrtoul(s, 10, &val); - if (err) - return err; - vdso_enabled = val; - return 0; + if (!err) + vdso_enabled = val; + return 1; } __setup("vdso=", vdso_setup); diff --git a/arch/um/Makefile b/arch/um/Makefile index 3dbd0e3b660e..778c50f27399 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -118,7 +118,9 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +ifdef CONFIG_LD_SCRIPT_DYN +LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie +endif LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 3d7836c46507..cabcc501b448 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -204,7 +204,7 @@ static int uml_net_close(struct net_device *dev) return 0; } -static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct uml_net_private *lp = netdev_priv(dev); unsigned long flags; diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 4b6d1b526bc1..66fe06db872f 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" + asm goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index d8b8b4f07e42..444bae755b16 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -50,7 +50,7 @@ extern void do_uml_exitcalls(void); * Are we disallowed to sleep? Used to choose between GFP_KERNEL and * GFP_ATOMIC. */ -extern int __cant_sleep(void); +extern int __uml_cant_sleep(void); extern int get_current_pid(void); extern int copy_from_user_proc(void *to, void *from, int size); extern char *uml_strdup(const char *string); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 010bc422a09d..a351c87db248 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -220,7 +220,7 @@ void arch_cpu_idle(void) raw_local_irq_enable(); } -int __cant_sleep(void) { +int __uml_cant_sleep(void) { return in_atomic() || irqs_disabled() || in_interrupt(); /* Is in_interrupt() really needed? */ } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index fddd1dec27e6..3e270da6b6f6 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -432,9 +432,29 @@ static void time_travel_update_time(unsigned long long next, bool idle) time_travel_del_event(&ne); } +static void time_travel_update_time_rel(unsigned long long offs) +{ + unsigned long flags; + + /* + * Disable interrupts before calculating the new time so + * that a real timer interrupt (signal) can't happen at + * a bad time e.g. after we read time_travel_time but + * before we've completed updating the time. + */ + local_irq_save(flags); + time_travel_update_time(time_travel_time + offs, false); + local_irq_restore(flags); +} + void time_travel_ndelay(unsigned long nsec) { - time_travel_update_time(time_travel_time + nsec, false); + /* + * Not strictly needed to use _rel() version since this is + * only used in INFCPU/EXT modes, but it doesn't hurt and + * is more readable too. + */ + time_travel_update_time_rel(nsec); } EXPORT_SYMBOL(time_travel_ndelay); @@ -568,7 +588,11 @@ static void time_travel_set_start(void) #define time_travel_time 0 #define time_travel_ext_waiting 0 -static inline void time_travel_update_time(unsigned long long ns, bool retearly) +static inline void time_travel_update_time(unsigned long long ns, bool idle) +{ +} + +static inline void time_travel_update_time_rel(unsigned long long offs) { } @@ -720,9 +744,7 @@ static u64 timer_read(struct clocksource *cs) */ if (!irqs_disabled() && !in_interrupt() && !in_softirq() && !time_travel_ext_waiting) - time_travel_update_time(time_travel_time + - TIMER_MULTIPLIER, - false); + time_travel_update_time_rel(TIMER_MULTIPLIER); return time_travel_time / TIMER_MULTIPLIER; } diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index b459745f52e2..3cb8ac63be6e 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -46,7 +46,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) unsigned long stack, sp; int pid, fds[2], ret, n; - stack = alloc_stack(0, __cant_sleep()); + stack = alloc_stack(0, __uml_cant_sleep()); if (stack == 0) return -ENOMEM; @@ -70,7 +70,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) data.pre_data = pre_data; data.argv = argv; data.fd = fds[1]; - data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) : + data.buf = __uml_cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) : uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); pid = clone(helper_child, (void *) sp, CLONE_VM, &data); if (pid < 0) { @@ -121,7 +121,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long stack, sp; int pid, status, err; - stack = alloc_stack(0, __cant_sleep()); + stack = alloc_stack(0, __uml_cant_sleep()); if (stack == 0) return -ENOMEM; diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index fc0f2a9dee5a..1dca4ffbd572 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -173,23 +173,38 @@ __uml_setup("quiet", quiet_cmd_param, "quiet\n" " Turns off information messages during boot.\n\n"); +/* + * The os_info/os_warn functions will be called by helper threads. These + * have a very limited stack size and using the libc formatting functions + * may overflow the stack. + * So pull in the kernel vscnprintf and use that instead with a fixed + * on-stack buffer. + */ +int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); + void os_info(const char *fmt, ...) { + char buf[256]; va_list list; + int len; if (quiet_info) return; va_start(list, fmt); - vfprintf(stderr, fmt, list); + len = vscnprintf(buf, sizeof(buf), fmt, list); + fwrite(buf, len, 1, stderr); va_end(list); } void os_warn(const char *fmt, ...) { + char buf[256]; va_list list; + int len; va_start(list, fmt); - vfprintf(stderr, fmt, list); + len = vscnprintf(buf, sizeof(buf), fmt, list); + fwrite(buf, len, 1, stderr); va_end(list); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4c9bfc4be58d..bea53385d31e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1553,19 +1553,6 @@ config AMD_MEM_ENCRYPT This requires an AMD processor that supports Secure Memory Encryption (SME). -config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT - bool "Activate AMD Secure Memory Encryption (SME) by default" - depends on AMD_MEM_ENCRYPT - help - Say yes to have system memory encrypted by default if running on - an AMD processor that supports Secure Memory Encryption (SME). - - If set to Y, then the encryption of system memory can be - deactivated with the mem_encrypt=off command line option. - - If set to N, then the encryption of system memory can be - activated with the mem_encrypt=on command line option. - # Common NUMA Features config NUMA bool "NUMA Memory Allocation and Scheduler Support" @@ -1982,6 +1969,23 @@ config EFI_STUB See Documentation/admin-guide/efi-stub.rst for more information. +config EFI_HANDOVER_PROTOCOL + bool "EFI handover protocol (DEPRECATED)" + depends on EFI_STUB + default y + help + Select this in order to include support for the deprecated EFI + handover protocol, which defines alternative entry points into the + EFI stub. This is a practice that has no basis in the UEFI + specification, and requires a priori knowledge on the part of the + bootloader about Linux/x86 specific ways of passing the command line + and initrd, and where in memory those assets may be loaded. + + If in doubt, say Y. Even though the corresponding support is not + present in upstream GRUB or other bootloaders, most distros build + GRUB with numerous downstream patches applied, and may rely on the + handover protocol as as result. + config EFI_MIXED bool "EFI mixed-mode support" depends on EFI_STUB && X86_64 @@ -2548,6 +2552,17 @@ config GDS_FORCE_MITIGATION If in doubt, say N. +config MITIGATION_RFDS + bool "RFDS Mitigation" + depends on CPU_SUP_INTEL + default y + help + Enable mitigation for Register File Data Sampling (RFDS) by default. + RFDS is a hardware vulnerability which affects Intel Atom CPUs. It + allows unprivileged speculative access to stale data previously + stored in floating point, vector and integer registers. + See also + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd419d..ce5ed2c2db0c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -375,7 +375,7 @@ config X86_CMOV config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) default "5" if X86_32 && X86_CMPXCHG64 default "4" diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 15b7b403a4bd..3965b2c9efee 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) endif +ifeq ($(CONFIG_EFI_STUB),y) +# ensure that the static EFI stub library will be pulled in, even if it is +# never referenced explicitly from the startup code +LDFLAGS_vmlinux += -u efi_pe_entry +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy @@ -100,7 +105,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/ident_map_64.o vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o - vmlinux-objs-y += $(obj)/mem_encrypt.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif @@ -108,11 +113,11 @@ endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o -vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o -efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o +vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 9caf89063e77..55c98fdd67d2 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -30,13 +30,13 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len) * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to * ACPI_TABLE_GUID because it has more features. */ - rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, ACPI_20_TABLE_GUID); if (rsdp_addr) return (acpi_physical_address)rsdp_addr; /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */ - rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, ACPI_TABLE_GUID); if (rsdp_addr) return (acpi_physical_address)rsdp_addr; @@ -56,15 +56,15 @@ static acpi_physical_address efi_get_rsdp_addr(void) enum efi_type et; int ret; - et = efi_get_type(boot_params); + et = efi_get_type(boot_params_ptr); if (et == EFI_TYPE_NONE) return 0; - systab_pa = efi_get_system_table(boot_params); + systab_pa = efi_get_system_table(boot_params_ptr); if (!systab_pa) error("EFI support advertised, but unable to locate system table."); - ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len); + ret = efi_get_conf_table(boot_params_ptr, &cfg_tbl_pa, &cfg_tbl_len); if (ret || !cfg_tbl_pa) error("EFI config table not found."); @@ -156,7 +156,7 @@ acpi_physical_address get_rsdp_addr(void) { acpi_physical_address pa; - pa = boot_params->acpi_rsdp_addr; + pa = boot_params_ptr->acpi_rsdp_addr; if (!pa) pa = efi_get_rsdp_addr(); @@ -210,7 +210,7 @@ static unsigned long get_acpi_srat_table(void) rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp(); if (!rsdp) rsdp = (struct acpi_table_rsdp *)(long) - boot_params->acpi_rsdp_addr; + boot_params_ptr->acpi_rsdp_addr; if (!rsdp) return 0; diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index f1add5d85da9..c1bb180973ea 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -14,9 +14,9 @@ static inline char rdfs8(addr_t addr) #include "../cmdline.c" unsigned long get_cmd_line_ptr(void) { - unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; + unsigned long cmd_line_ptr = boot_params_ptr->hdr.cmd_line_ptr; - cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; + cmd_line_ptr |= (u64)boot_params_ptr->ext_cmd_line_ptr << 32; return cmd_line_ptr; } diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S new file mode 100644 index 000000000000..fb6d60dcd6ed --- /dev/null +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming + * + * Early support for invoking 32-bit EFI services from a 64-bit kernel. + * + * Because this thunking occurs before ExitBootServices() we have to + * restore the firmware's 32-bit GDT and IDT before we make EFI service + * calls. + * + * On the plus side, we don't have to worry about mangling 64-bit + * addresses into 32-bits because we're executing with an identity + * mapped pagetable and haven't transitioned to 64-bit virtual addresses + * yet. + */ + +#include +#include +#include +#include +#include +#include +#include + + .code64 + .text +/* + * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() + * is the first thing that runs after switching to long mode. Depending on + * whether the EFI handover protocol or the compat entry point was used to + * enter the kernel, it will either branch to the common 64-bit EFI stub + * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF + * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a + * struct bootparams pointer as the third argument, so the presence of such a + * pointer is used to disambiguate. + * + * +--------------+ + * +------------------+ +------------+ +------>| efi_pe_entry | + * | efi32_pe_entry |---->| | | +-----------+--+ + * +------------------+ | | +------+----------------+ | + * | startup_32 |---->| startup_64_mixed_mode | | + * +------------------+ | | +------+----------------+ | + * | efi32_stub_entry |---->| | | | + * +------------------+ +------------+ | | + * V | + * +------------+ +----------------+ | + * | startup_64 |<----| efi_stub_entry |<--------+ + * +------------+ +----------------+ + */ +SYM_FUNC_START(startup_64_mixed_mode) + lea efi32_boot_args(%rip), %rdx + mov 0(%rdx), %edi + mov 4(%rdx), %esi + + /* Switch to the firmware's stack */ + movl efi32_boot_sp(%rip), %esp + andl $~7, %esp + +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + mov 8(%rdx), %edx // saved bootparams pointer + test %edx, %edx + jnz efi_stub_entry +#endif + /* + * efi_pe_entry uses MS calling convention, which requires 32 bytes of + * shadow space on the stack even if all arguments are passed in + * registers. We also need an additional 8 bytes for the space that + * would be occupied by the return address, and this also results in + * the correct stack alignment for entry. + */ + sub $40, %rsp + mov %rdi, %rcx // MS calling convention + mov %rsi, %rdx + jmp efi_pe_entry +SYM_FUNC_END(startup_64_mixed_mode) + +SYM_FUNC_START(__efi64_thunk) + push %rbp + push %rbx + + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + /* Copy args passed on stack */ + movq 0x30(%rsp), %rbp + movq 0x38(%rsp), %rbx + movq 0x40(%rsp), %rax + + /* + * Convert x86-64 ABI params to i386 ABI + */ + subq $64, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movl %r8d, 0xc(%rsp) + movl %r9d, 0x10(%rsp) + movl %ebp, 0x14(%rsp) + movl %ebx, 0x18(%rsp) + movl %eax, 0x1c(%rsp) + + leaq 0x20(%rsp), %rbx + sgdt (%rbx) + sidt 16(%rbx) + + leaq 1f(%rip), %rbp + + /* + * Switch to IDT and GDT with 32-bit segments. These are the firmware + * GDT and IDT that were installed when the kernel started executing. + * The pointers were saved by the efi32_entry() routine below. + * + * Pass the saved DS selector to the 32-bit code, and use far return to + * restore the saved CS selector. + */ + lidt efi32_boot_idt(%rip) + lgdt efi32_boot_gdt(%rip) + + movzwl efi32_boot_ds(%rip), %edx + movzwq efi32_boot_cs(%rip), %rax + pushq %rax + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $64, %rsp + movq %rdi, %rax + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + /* Clear out 32-bit selector from FS and GS */ + xorl %ebx, %ebx + movl %ebx, %fs + movl %ebx, %gs + + /* + * Convert 32-bit status code into 64-bit. + */ + roll $1, %eax + rorq $1, %rax + + pop %rbx + pop %rbp + RET +SYM_FUNC_END(__efi64_thunk) + + .code32 +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +SYM_FUNC_START(efi32_stub_entry) + call 1f +1: popl %ecx + leal (efi32_boot_args - 1b)(%ecx), %ebx + + /* Clear BSS */ + xorl %eax, %eax + leal (_bss - 1b)(%ecx), %edi + leal (_ebss - 1b)(%ecx), %ecx + subl %edi, %ecx + shrl $2, %ecx + cld + rep stosl + + add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi + movl %esi, 8(%ebx) + jmp efi32_entry +SYM_FUNC_END(efi32_stub_entry) +#endif + +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +SYM_FUNC_START_LOCAL(efi_enter32) + /* Load firmware selector into data and stack segment registers */ + movl %edx, %ds + movl %edx, %es + movl %edx, %fs + movl %edx, %gs + movl %edx, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs and IDTs. + */ + cli + + lidtl 16(%ebx) + lgdtl (%ebx) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + pushl $__KERNEL_CS + pushl %ebp + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +SYM_FUNC_END(efi_enter32) + +/* + * This is the common EFI stub entry point for mixed mode. + * + * Arguments: %ecx image handle + * %edx EFI system table pointer + * + * Since this is the point of no return for ordinary execution, no registers + * are considered live except for the function parameters. [Note that the EFI + * stub may still exit and return to the firmware using the Exit() EFI boot + * service.] + */ +SYM_FUNC_START_LOCAL(efi32_entry) + call 1f +1: pop %ebx + + /* Save firmware GDTR and code/data selectors */ + sgdtl (efi32_boot_gdt - 1b)(%ebx) + movw %cs, (efi32_boot_cs - 1b)(%ebx) + movw %ds, (efi32_boot_ds - 1b)(%ebx) + + /* Store firmware IDT descriptor */ + sidtl (efi32_boot_idt - 1b)(%ebx) + + /* Store firmware stack pointer */ + movl %esp, (efi32_boot_sp - 1b)(%ebx) + + /* Store boot arguments */ + leal (efi32_boot_args - 1b)(%ebx), %ebx + movl %ecx, 0(%ebx) + movl %edx, 4(%ebx) + movb $0x0, 12(%ebx) // efi_is64 + + /* + * Allocate some memory for a temporary struct boot_params, which only + * needs the minimal pieces that startup_32() relies on. + */ + subl $PARAM_SIZE, %esp + movl %esp, %esi + movl $PAGE_SIZE, BP_kernel_alignment(%esi) + movl $_end - 1b, BP_init_size(%esi) + subl $startup_32 - 1b, BP_init_size(%esi) + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + jmp startup_32 +SYM_FUNC_END(efi32_entry) + +/* + * efi_status_t efi32_pe_entry(efi_handle_t image_handle, + * efi_system_table_32_t *sys_table) + */ +SYM_FUNC_START(efi32_pe_entry) + pushl %ebp + movl %esp, %ebp + pushl %ebx // save callee-save registers + pushl %edi + + call verify_cpu // check for long mode support + testl %eax, %eax + movl $0x80000003, %eax // EFI_UNSUPPORTED + jnz 2f + + movl 8(%ebp), %ecx // image_handle + movl 12(%ebp), %edx // sys_table + jmp efi32_entry // pass %ecx, %edx + // no other registers remain live + +2: popl %edi // restore callee-save registers + popl %ebx + leave + RET +SYM_FUNC_END(efi32_pe_entry) + +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + .org efi32_stub_entry + 0x200 + .code64 +SYM_FUNC_START_NOALIGN(efi64_stub_entry) + jmp efi_handover_entry +SYM_FUNC_END(efi64_stub_entry) +#endif + + .data + .balign 8 +SYM_DATA_START_LOCAL(efi32_boot_gdt) + .word 0 + .quad 0 +SYM_DATA_END(efi32_boot_gdt) + +SYM_DATA_START_LOCAL(efi32_boot_idt) + .word 0 + .quad 0 +SYM_DATA_END(efi32_boot_idt) + +SYM_DATA_LOCAL(efi32_boot_cs, .word 0) +SYM_DATA_LOCAL(efi32_boot_ds, .word 0) +SYM_DATA_LOCAL(efi32_boot_sp, .long 0) +SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) +SYM_DATA(efi_is64, .byte 1) diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S deleted file mode 100644 index 67e7edcdfea8..000000000000 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming - * - * Early support for invoking 32-bit EFI services from a 64-bit kernel. - * - * Because this thunking occurs before ExitBootServices() we have to - * restore the firmware's 32-bit GDT and IDT before we make EFI service - * calls. - * - * On the plus side, we don't have to worry about mangling 64-bit - * addresses into 32-bits because we're executing with an identity - * mapped pagetable and haven't transitioned to 64-bit virtual addresses - * yet. - */ - -#include -#include -#include -#include -#include - - .code64 - .text -SYM_FUNC_START(__efi64_thunk) - push %rbp - push %rbx - - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - /* Copy args passed on stack */ - movq 0x30(%rsp), %rbp - movq 0x38(%rsp), %rbx - movq 0x40(%rsp), %rax - - /* - * Convert x86-64 ABI params to i386 ABI - */ - subq $64, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movl %r8d, 0xc(%rsp) - movl %r9d, 0x10(%rsp) - movl %ebp, 0x14(%rsp) - movl %ebx, 0x18(%rsp) - movl %eax, 0x1c(%rsp) - - leaq 0x20(%rsp), %rbx - sgdt (%rbx) - - addq $16, %rbx - sidt (%rbx) - - leaq 1f(%rip), %rbp - - /* - * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT - * and IDT that was installed when the kernel started executing. The - * pointers were saved at the EFI stub entry point in head_64.S. - * - * Pass the saved DS selector to the 32-bit code, and use far return to - * restore the saved CS selector. - */ - leaq efi32_boot_idt(%rip), %rax - lidt (%rax) - leaq efi32_boot_gdt(%rip), %rax - lgdt (%rax) - - movzwl efi32_boot_ds(%rip), %edx - movzwq efi32_boot_cs(%rip), %rax - pushq %rax - leaq efi_enter32(%rip), %rax - pushq %rax - lretq - -1: addq $64, %rsp - movq %rdi, %rax - - pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - /* Clear out 32-bit selector from FS and GS */ - xorl %ebx, %ebx - movl %ebx, %fs - movl %ebx, %gs - - /* - * Convert 32-bit status code into 64-bit. - */ - roll $1, %eax - rorq $1, %rax - - pop %rbx - pop %rbp - RET -SYM_FUNC_END(__efi64_thunk) - - .code32 -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -SYM_FUNC_START_LOCAL(efi_enter32) - /* Load firmware selector into data and stack segment registers */ - movl %edx, %ds - movl %edx, %es - movl %edx, %fs - movl %edx, %gs - movl %edx, %ss - - /* Reload pgtables */ - movl %cr3, %eax - movl %eax, %cr3 - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - /* Disable long mode via EFER */ - movl $MSR_EFER, %ecx - rdmsr - btrl $_EFER_LME, %eax - wrmsr - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - /* - * Some firmware will return with interrupts enabled. Be sure to - * disable them before we switch GDTs and IDTs. - */ - cli - - lidtl (%ebx) - subl $16, %ebx - - lgdtl (%ebx) - - movl %cr4, %eax - btsl $(X86_CR4_PAE_BIT), %eax - movl %eax, %cr4 - - movl %cr3, %eax - movl %eax, %cr3 - - movl $MSR_EFER, %ecx - rdmsr - btsl $_EFER_LME, %eax - wrmsr - - xorl %eax, %eax - lldt %ax - - pushl $__KERNEL_CS - pushl %ebp - - /* Enable paging */ - movl %cr0, %eax - btsl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - lret -SYM_FUNC_END(efi_enter32) - - .data - .balign 8 -SYM_DATA_START(efi32_boot_gdt) - .word 0 - .quad 0 -SYM_DATA_END(efi32_boot_gdt) - -SYM_DATA_START(efi32_boot_idt) - .word 0 - .quad 0 -SYM_DATA_END(efi32_boot_idt) - -SYM_DATA_START(efi32_boot_cs) - .word 0 -SYM_DATA_END(efi32_boot_cs) - -SYM_DATA_START(efi32_boot_ds) - .word 0 -SYM_DATA_END(efi32_boot_ds) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3b354eb9516d..1cfe9802a42f 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE leal startup_32@GOTOFF(%edx), %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32() will be at an - * offset to the start of the space allocated for the image. efi_pe_entry() will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl image_offset@GOTOFF(%edx), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32) jmp *%eax SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_STUB -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp - movl 8(%esp), %esi /* save boot_params pointer */ - call efi_main - /* efi_main returns the possibly relocated address of startup_32 */ - jmp *%eax -SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) @@ -179,15 +155,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) */ /* push arguments for extract_kernel: */ - pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */ pushl %ebp /* output address */ - pushl input_len@GOTOFF(%ebx) /* input_len */ - leal input_data@GOTOFF(%ebx), %eax - pushl %eax /* input_data */ - leal boot_heap@GOTOFF(%ebx), %eax - pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call extract_kernel /* returns kernel location in %eax */ + call extract_kernel /* returns kernel entry point in %eax */ addl $24, %esp /* @@ -208,17 +178,11 @@ SYM_DATA_START_LOCAL(gdt) .quad 0x00cf92000000ffff /* __KERNEL_DS */ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) -#ifdef CONFIG_EFI_STUB -SYM_DATA(image_offset, .long 0) -#endif - /* * Stack and heap for uncompression */ .bss .balign 4 -boot_heap: - .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: .fill BOOT_STACK_SIZE, 1, 0 boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b4bd6df29116..0d7aef10b19a 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -118,7 +118,9 @@ SYM_FUNC_START(startup_32) 1: /* Setup Exception handling for SEV-ES */ +#ifdef CONFIG_AMD_MEM_ENCRYPT call startup32_load_idt +#endif /* Make sure cpu supports long mode. */ call verify_cpu @@ -136,19 +138,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl rva(image_offset)(%ebp), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -178,12 +167,13 @@ SYM_FUNC_START(startup_32) */ /* * If SEV is active then set the encryption mask in the page tables. - * This will insure that when the kernel is copied and decompressed + * This will ensure that when the kernel is copied and decompressed * it will be done so encrypted. */ - call get_sev_encryption_bit xorl %edx, %edx #ifdef CONFIG_AMD_MEM_ENCRYPT + call get_sev_encryption_bit + xorl %edx, %edx testl %eax, %eax jz 1f subl $32, %eax /* Encryption bit is always above bit 31 */ @@ -249,6 +239,11 @@ SYM_FUNC_START(startup_32) movl $__BOOT_TSS, %eax ltr %ax +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* Check if the C-bit position is correct when SEV is active */ + call startup32_check_sev_cbit +#endif + /* * Setup for the jump to 64bit mode * @@ -261,29 +256,11 @@ SYM_FUNC_START(startup_32) */ leal rva(startup_64)(%ebp), %eax #ifdef CONFIG_EFI_MIXED - movl rva(efi32_boot_args)(%ebp), %edi - testl %edi, %edi - jz 1f - leal rva(efi64_stub_entry)(%ebp), %eax - movl rva(efi32_boot_args+4)(%ebp), %esi - movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer - testl %edx, %edx - jnz 1f - /* - * efi_pe_entry uses MS calling convention, which requires 32 bytes of - * shadow space on the stack even if all arguments are passed in - * registers. We also need an additional 8 bytes for the space that - * would be occupied by the return address, and this also results in - * the correct stack alignment for entry. - */ - subl $40, %esp - leal rva(efi_pe_entry)(%ebp), %eax - movl %edi, %ecx // MS calling convention - movl %esi, %edx + cmpb $1, rva(efi_is64)(%ebp) + je 1f + leal rva(startup_64_mixed_mode)(%ebp), %eax 1: #endif - /* Check if the C-bit position is correct when SEV is active */ - call startup32_check_sev_cbit pushl $__KERNEL_CS pushl %eax @@ -296,41 +273,6 @@ SYM_FUNC_START(startup_32) lret SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_MIXED - .org 0x190 -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp /* Discard return address */ - popl %ecx - popl %edx - popl %esi - - call 1f -1: pop %ebp - subl $ rva(1b), %ebp - - movl %esi, rva(efi32_boot_args+8)(%ebp) -SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) - movl %ecx, rva(efi32_boot_args)(%ebp) - movl %edx, rva(efi32_boot_args+4)(%ebp) - movb $0, rva(efi_is64)(%ebp) - - /* Save firmware GDTR and code/data selectors */ - sgdtl rva(efi32_boot_gdt)(%ebp) - movw %cs, rva(efi32_boot_cs)(%ebp) - movw %ds, rva(efi32_boot_ds)(%ebp) - - /* Store firmware IDT descriptor */ - sidtl rva(efi32_boot_idt)(%ebp) - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - jmp startup_32 -SYM_FUNC_END(efi32_stub_entry) -#endif - .code64 .org 0x200 SYM_CODE_START(startup_64) @@ -372,20 +314,6 @@ SYM_CODE_START(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - movl image_offset(%rip), %eax - subq %rax, %rbp -#endif - movl BP_kernel_alignment(%rsi), %eax decl %eax addq %rax, %rbp @@ -424,10 +352,6 @@ SYM_CODE_START(startup_64) * For the trampoline, we need the top page table to reside in lower * memory as we don't have a way to load 64-bit values into CR3 in * 32-bit mode. - * - * We go though the trampoline even if we don't have to: if we're - * already in a desired paging mode. This way the trampoline code gets - * tested on every boot. */ /* Make sure we have GDT with 32-bit code segment */ @@ -442,10 +366,14 @@ SYM_CODE_START(startup_64) lretq .Lon_kernel_cs: + /* + * RSI holds a pointer to a boot_params structure provided by the + * loader, and this needs to be preserved across C function calls. So + * move it into a callee saved register. + */ + movq %rsi, %r15 - pushq %rsi call load_stage1_idt - popq %rsi #ifdef CONFIG_AMD_MEM_ENCRYPT /* @@ -456,82 +384,24 @@ SYM_CODE_START(startup_64) * CPUID instructions being issued, so go ahead and do that now via * sev_enable(), which will also handle the rest of the SEV-related * detection/setup to ensure that has been done in advance of any dependent - * code. + * code. Pass the boot_params pointer as the first argument. */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ + movq %r15, %rdi call sev_enable - popq %rsi #endif /* - * paging_prepare() sets up the trampoline and checks if we need to - * enable 5-level paging. + * configure_5level_paging() updates the number of paging levels using + * a trampoline in 32-bit addressable memory if the current number does + * not match the desired number. * - * paging_prepare() returns a two-quadword structure which lands - * into RDX:RAX: - * - Address of the trampoline is returned in RAX. - * - Non zero RDX means trampoline needs to enable 5-level - * paging. - * - * RSI holds real mode data and needs to be preserved across - * this function call. + * Pass the boot_params pointer as the first argument. The second + * argument is the relocated address of the page table to use instead + * of the page table in trampoline memory (if required). */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ - call paging_prepare - popq %rsi - - /* Save the trampoline address in RCX */ - movq %rax, %rcx - - /* Set up 32-bit addressable stack */ - leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp - - /* - * Preserve live 64-bit registers on the stack: this is necessary - * because the architecture does not guarantee that GPRs will retain - * their full 64-bit values across a 32-bit mode switch. - */ - pushq %rbp - pushq %rbx - pushq %rsi - - /* - * Push the 64-bit address of trampoline_return() onto the new stack. - * It will be used by the trampoline to return to the main code. Due to - * the 32-bit mode switch, it cannot be kept it in a register either. - */ - leaq trampoline_return(%rip), %rdi - pushq %rdi - - /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ - pushq $__KERNEL32_CS - leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax - pushq %rax - lretq -trampoline_return: - /* Restore live 64-bit registers */ - popq %rsi - popq %rbx - popq %rbp - - /* Restore the stack, the 32-bit trampoline uses its own stack */ - leaq rva(boot_stack_end)(%rbx), %rsp - - /* - * cleanup_trampoline() would restore trampoline memory. - * - * RDI is address of the page table to use instead of page table - * in trampoline memory (if required). - * - * RSI holds real mode data and needs to be preserved across - * this function call. - */ - pushq %rsi - leaq rva(top_pgtable)(%rbx), %rdi - call cleanup_trampoline - popq %rsi + movq %r15, %rdi + leaq rva(top_pgtable)(%rbx), %rsi + call configure_5level_paging /* Zero EFLAGS */ pushq $0 @@ -541,7 +411,6 @@ trampoline_return: * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushq %rsi leaq (_bss-8)(%rip), %rsi leaq rva(_bss-8)(%rbx), %rdi movl $(_bss - startup_32), %ecx @@ -549,7 +418,6 @@ trampoline_return: std rep movsq cld - popq %rsi /* * The GDT may get overwritten either during the copy we just did or @@ -568,19 +436,6 @@ trampoline_return: jmp *%rax SYM_CODE_END(startup_64) -#ifdef CONFIG_EFI_STUB - .org 0x390 -SYM_FUNC_START(efi64_stub_entry) - and $~0xf, %rsp /* realign the stack */ - movq %rdx, %rbx /* save boot_params pointer */ - call efi_main - movq %rbx,%rsi - leaq rva(startup_64)(%rax), %rax - jmp *%rax -SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) @@ -594,125 +449,122 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) shrq $3, %rcx rep stosq - pushq %rsi call load_stage2_idt /* Pass boot_params to initialize_identity_maps() */ - movq (%rsp), %rdi + movq %r15, %rdi call initialize_identity_maps - popq %rsi /* * Do the extraction, and jump to the new kernel.. */ - pushq %rsi /* Save the real mode argument */ - movq %rsi, %rdi /* real mode address */ - leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ - leaq input_data(%rip), %rdx /* input_data */ - movl input_len(%rip), %ecx /* input_len */ - movq %rbp, %r8 /* output target address */ - movl output_len(%rip), %r9d /* decompressed length, end of relocs */ - call extract_kernel /* returns kernel location in %rax */ - popq %rsi + /* pass struct boot_params pointer and output target address */ + movq %r15, %rdi + movq %rbp, %rsi + call extract_kernel /* returns kernel entry point in %rax */ /* * Jump to the decompressed kernel. */ + movq %r15, %rsi jmp *%rax SYM_FUNC_END(.Lrelocated) - .code32 /* - * This is the 32-bit trampoline that will be copied over to low memory. + * This is the 32-bit trampoline that will be copied over to low memory. It + * will be called using the ordinary 64-bit calling convention from code + * running in 64-bit mode. * * Return address is at the top of the stack (might be above 4G). - * ECX contains the base address of the trampoline memory. - * Non zero RDX means trampoline needs to enable 5-level paging. + * The first argument (EDI) contains the address of the temporary PGD level + * page table in 32-bit addressable memory which will be programmed into + * register CR3. */ + .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) - /* Set up data and stack segments */ - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %ss + /* + * Preserve callee save 64-bit registers on the stack: this is + * necessary because the architecture does not guarantee that GPRs will + * retain their full 64-bit values across a 32-bit mode switch. + */ + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbp + pushq %rbx + /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ + movq %rsp, %rbx + shrq $32, %rbx + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq 0f(%rip), %rax + pushq %rax + lretq + + /* + * The 32-bit code below will do a far jump back to long mode and end + * up here after reconfiguring the number of paging levels. First, the + * stack pointer needs to be restored to its full 64-bit value before + * the callee save register contents can be popped from the stack. + */ +.Lret: + shlq $32, %rbx + orq %rbx, %rsp + + /* Restore the preserved 64-bit registers */ + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .code32 +0: /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - /* Check what paging mode we want to be in after the trampoline */ - testl %edx, %edx - jz 1f - - /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jnz 3f - jmp 2f -1: - /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jz 3f -2: /* Point CR3 to the trampoline's new top level page table */ - leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax - movl %eax, %cr3 -3: + movl %edi, %cr3 + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ - pushl %ecx - pushl %edx movl $MSR_EFER, %ecx rdmsr btsl $_EFER_LME, %eax /* Avoid writing EFER if no change was made (for TDX guest) */ jc 1f wrmsr -1: popl %edx - popl %ecx - -#ifdef CONFIG_X86_MCE - /* - * Preserve CR4.MCE if the kernel will enable #MC support. - * Clearing MCE may fault in some environments (that also force #MC - * support). Any machine check that occurs before #MC support is fully - * configured will crash the system regardless of the CR4.MCE value set - * here. - */ - movl %cr4, %eax - andl $X86_CR4_MCE, %eax -#else - movl $0, %eax -#endif - - /* Enable PAE and LA57 (if required) paging modes */ - orl $X86_CR4_PAE, %eax - testl %edx, %edx - jz 1f - orl $X86_CR4_LA57, %eax 1: + /* Toggle CR4.LA57 */ + movl %cr4, %eax + btcl $X86_CR4_LA57_BIT, %eax movl %eax, %cr4 - /* Calculate address of paging_enabled() once we are executing in the trampoline */ - leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax - - /* Prepare the stack for far return to Long Mode */ - pushl $__KERNEL_CS - pushl %eax - /* Enable paging again. */ movl %cr0, %eax btsl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - lret + /* + * Return to the 64-bit calling code using LJMP rather than LRET, to + * avoid the need for a 32-bit addressable stack. The destination + * address will be adjusted after the template code is copied into a + * 32-bit addressable buffer. + */ +.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) SYM_CODE_END(trampoline_32bit_src) - .code64 -SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) - /* Return from the trampoline */ - retq -SYM_FUNC_END(.Lpaging_enabled) +/* + * This symbol is placed right after trampoline_32bit_src() so its address can + * be used to infer the size of the trampoline code. + */ +SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) /* * The trampoline code has a size limit. @@ -721,7 +573,7 @@ SYM_FUNC_END(.Lpaging_enabled) */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE - .code32 + .text SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: @@ -729,6 +581,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) jmp 1b SYM_FUNC_END(.Lno_longmode) + .globl verify_cpu #include "../../kernel/verify_cpu.S" .data @@ -760,249 +613,11 @@ SYM_DATA_START(boot_idt) .endr SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) -#ifdef CONFIG_AMD_MEM_ENCRYPT -SYM_DATA_START(boot32_idt_desc) - .word boot32_idt_end - boot32_idt - 1 - .long 0 -SYM_DATA_END(boot32_idt_desc) - .balign 8 -SYM_DATA_START(boot32_idt) - .rept 32 - .quad 0 - .endr -SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) -#endif - -#ifdef CONFIG_EFI_STUB -SYM_DATA(image_offset, .long 0) -#endif -#ifdef CONFIG_EFI_MIXED -SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) -SYM_DATA(efi_is64, .byte 1) - -#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) -#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - - __HEAD - .code32 -SYM_FUNC_START(efi32_pe_entry) -/* - * efi_status_t efi32_pe_entry(efi_handle_t image_handle, - * efi_system_table_32_t *sys_table) - */ - - pushl %ebp - movl %esp, %ebp - pushl %eax // dummy push to allocate loaded_image - - pushl %ebx // save callee-save registers - pushl %edi - - call verify_cpu // check for long mode support - testl %eax, %eax - movl $0x80000003, %eax // EFI_UNSUPPORTED - jnz 2f - - call 1f -1: pop %ebx - subl $ rva(1b), %ebx - - /* Get the loaded image protocol pointer from the image handle */ - leal -4(%ebp), %eax - pushl %eax // &loaded_image - leal rva(loaded_image_proto)(%ebx), %eax - pushl %eax // pass the GUID address - pushl 8(%ebp) // pass the image handle - - /* - * Note the alignment of the stack frame. - * sys_table - * handle <-- 16-byte aligned on entry by ABI - * return address - * frame pointer - * loaded_image <-- local variable - * saved %ebx <-- 16-byte aligned here - * saved %edi - * &loaded_image - * &loaded_image_proto - * handle <-- 16-byte aligned for call to handle_protocol - */ - - movl 12(%ebp), %eax // sys_table - movl ST32_boottime(%eax), %eax // sys_table->boottime - call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol - addl $12, %esp // restore argument space - testl %eax, %eax - jnz 2f - - movl 8(%ebp), %ecx // image_handle - movl 12(%ebp), %edx // sys_table - movl -4(%ebp), %esi // loaded_image - movl LI32_image_base(%esi), %esi // loaded_image->image_base - movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry - /* - * We need to set the image_offset variable here since startup_32() will - * use it before we get to the 64-bit efi_pe_entry() in C code. - */ - subl %esi, %ebx - movl %ebx, rva(image_offset)(%ebp) // save image_offset - jmp efi32_pe_stub_entry - -2: popl %edi // restore callee-save registers - popl %ebx - leave - RET -SYM_FUNC_END(efi32_pe_entry) - - .section ".rodata" - /* EFI loaded image protocol GUID */ - .balign 4 -SYM_DATA_START_LOCAL(loaded_image_proto) - .long 0x5b1b31a1 - .word 0x9562, 0x11d2 - .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b -SYM_DATA_END(loaded_image_proto) -#endif - -#ifdef CONFIG_AMD_MEM_ENCRYPT - __HEAD - .code32 -/* - * Write an IDT entry into boot32_idt - * - * Parameters: - * - * %eax: Handler address - * %edx: Vector number - * - * Physical offset is expected in %ebp - */ -SYM_FUNC_START(startup32_set_idt_entry) - push %ebx - push %ecx - - /* IDT entry address to %ebx */ - leal rva(boot32_idt)(%ebp), %ebx - shl $3, %edx - addl %edx, %ebx - - /* Build IDT entry, lower 4 bytes */ - movl %eax, %edx - andl $0x0000ffff, %edx # Target code segment offset [15:0] - movl $__KERNEL32_CS, %ecx # Target code segment selector - shl $16, %ecx - orl %ecx, %edx - - /* Store lower 4 bytes to IDT */ - movl %edx, (%ebx) - - /* Build IDT entry, upper 4 bytes */ - movl %eax, %edx - andl $0xffff0000, %edx # Target code segment offset [31:16] - orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate - - /* Store upper 4 bytes to IDT */ - movl %edx, 4(%ebx) - - pop %ecx - pop %ebx - RET -SYM_FUNC_END(startup32_set_idt_entry) -#endif - -SYM_FUNC_START(startup32_load_idt) -#ifdef CONFIG_AMD_MEM_ENCRYPT - /* #VC handler */ - leal rva(startup32_vc_handler)(%ebp), %eax - movl $X86_TRAP_VC, %edx - call startup32_set_idt_entry - - /* Load IDT */ - leal rva(boot32_idt)(%ebp), %eax - movl %eax, rva(boot32_idt_desc+2)(%ebp) - lidt rva(boot32_idt_desc)(%ebp) -#endif - RET -SYM_FUNC_END(startup32_load_idt) - -/* - * Check for the correct C-bit position when the startup_32 boot-path is used. - * - * The check makes use of the fact that all memory is encrypted when paging is - * disabled. The function creates 64 bits of random data using the RDRAND - * instruction. RDRAND is mandatory for SEV guests, so always available. If the - * hypervisor violates that the kernel will crash right here. - * - * The 64 bits of random data are stored to a memory location and at the same - * time kept in the %eax and %ebx registers. Since encryption is always active - * when paging is off the random data will be stored encrypted in main memory. - * - * Then paging is enabled. When the C-bit position is correct all memory is - * still mapped encrypted and comparing the register values with memory will - * succeed. An incorrect C-bit position will map all memory unencrypted, so that - * the compare will use the encrypted random data and fail. - */ -SYM_FUNC_START(startup32_check_sev_cbit) -#ifdef CONFIG_AMD_MEM_ENCRYPT - pushl %eax - pushl %ebx - pushl %ecx - pushl %edx - - /* Check for non-zero sev_status */ - movl rva(sev_status)(%ebp), %eax - testl %eax, %eax - jz 4f - - /* - * Get two 32-bit random values - Don't bail out if RDRAND fails - * because it is better to prevent forward progress if no random value - * can be gathered. - */ -1: rdrand %eax - jnc 1b -2: rdrand %ebx - jnc 2b - - /* Store to memory and keep it in the registers */ - movl %eax, rva(sev_check_data)(%ebp) - movl %ebx, rva(sev_check_data+4)(%ebp) - - /* Enable paging to see if encryption is active */ - movl %cr0, %edx /* Backup %cr0 in %edx */ - movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ - movl %ecx, %cr0 - - cmpl %eax, rva(sev_check_data)(%ebp) - jne 3f - cmpl %ebx, rva(sev_check_data+4)(%ebp) - jne 3f - - movl %edx, %cr0 /* Restore previous %cr0 */ - - jmp 4f - -3: /* Check failed - hlt the machine */ - hlt - jmp 3b - -4: - popl %edx - popl %ecx - popl %ebx - popl %eax -#endif - RET -SYM_FUNC_END(startup32_check_sev_cbit) - /* * Stack and heap for uncompression */ .bss .balign 4 -SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) - SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 .balign 16 diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index b4155273df89..b8c42339bc35 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -167,8 +167,9 @@ void initialize_identity_maps(void *rmode) * or does not touch all the pages covering them. */ kernel_add_identity_map((unsigned long)_head, (unsigned long)_end); - boot_params = rmode; - kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1)); + boot_params_ptr = rmode; + kernel_add_identity_map((unsigned long)boot_params_ptr, + (unsigned long)(boot_params_ptr + 1)); cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); @@ -176,7 +177,7 @@ void initialize_identity_maps(void *rmode) * Also map the setup_data entries passed via boot_params in case they * need to be accessed by uncompressed kernel via the identity mapping. */ - sd = (struct setup_data *)boot_params->hdr.setup_data; + sd = (struct setup_data *)boot_params_ptr->hdr.setup_data; while (sd) { unsigned long sd_addr = (unsigned long)sd; @@ -393,3 +394,8 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) */ kernel_add_identity_map(address, end); } + +void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code) +{ + /* Empty handler to ignore NMI during early boot */ +} diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c index 3cdf94b41456..d100284bbef4 100644 --- a/arch/x86/boot/compressed/idt_64.c +++ b/arch/x86/boot/compressed/idt_64.c @@ -61,6 +61,7 @@ void load_stage2_idt(void) boot_idt_desc.address = (unsigned long)boot_idt; set_idt_entry(X86_TRAP_PF, boot_page_fault); + set_idt_entry(X86_TRAP_NMI, boot_nmi_trap); #ifdef CONFIG_AMD_MEM_ENCRYPT /* diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S index 22890e199f5b..4d03c8562f63 100644 --- a/arch/x86/boot/compressed/idt_handlers_64.S +++ b/arch/x86/boot/compressed/idt_handlers_64.S @@ -70,6 +70,7 @@ SYM_FUNC_END(\name) .code64 EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1 +EXCEPTION_HANDLER boot_nmi_trap do_boot_nmi_trap error_code=0 #ifdef CONFIG_AMD_MEM_ENCRYPT EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1 diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index e476bcbd9b42..9794d9174795 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -63,7 +63,7 @@ static unsigned long get_boot_seed(void) unsigned long hash = 0; hash = rotate_xor(hash, build_str, sizeof(build_str)); - hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); + hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr)); return hash; } @@ -383,7 +383,7 @@ static void handle_mem_options(void) static void mem_avoid_init(unsigned long input, unsigned long input_size, unsigned long output) { - unsigned long init_size = boot_params->hdr.init_size; + unsigned long init_size = boot_params_ptr->hdr.init_size; u64 initrd_start, initrd_size; unsigned long cmd_line, cmd_line_size; @@ -395,10 +395,10 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; /* Avoid initrd. */ - initrd_start = (u64)boot_params->ext_ramdisk_image << 32; - initrd_start |= boot_params->hdr.ramdisk_image; - initrd_size = (u64)boot_params->ext_ramdisk_size << 32; - initrd_size |= boot_params->hdr.ramdisk_size; + initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32; + initrd_start |= boot_params_ptr->hdr.ramdisk_image; + initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32; + initrd_size |= boot_params_ptr->hdr.ramdisk_size; mem_avoid[MEM_AVOID_INITRD].start = initrd_start; mem_avoid[MEM_AVOID_INITRD].size = initrd_size; /* No need to set mapping for initrd, it will be handled in VO. */ @@ -413,8 +413,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, } /* Avoid boot parameters. */ - mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; - mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); + mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr; + mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr); /* We don't need to set a mapping for setup_data. */ @@ -447,7 +447,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, } /* Avoid all entries in the setup_data linked list. */ - ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; + ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data; while (ptr) { struct mem_vector avoid; @@ -679,7 +679,7 @@ static bool process_mem_region(struct mem_vector *region, static bool process_efi_entries(unsigned long minimum, unsigned long image_size) { - struct efi_info *e = &boot_params->efi_info; + struct efi_info *e = &boot_params_ptr->efi_info; bool efi_mirror_found = false; struct mem_vector region; efi_memory_desc_t *md; @@ -761,8 +761,8 @@ static void process_e820_entries(unsigned long minimum, struct boot_e820_entry *entry; /* Verify potential e820 positions, appending to slots list. */ - for (i = 0; i < boot_params->e820_entries; i++) { - entry = &boot_params->e820_table[i]; + for (i = 0; i < boot_params_ptr->e820_entries; i++) { + entry = &boot_params_ptr->e820_table[i]; /* Skip non-RAM entries. */ if (entry->type != E820_TYPE_RAM) continue; @@ -836,7 +836,7 @@ void choose_random_location(unsigned long input, return; } - boot_params->hdr.loadflags |= KASLR_FLAG; + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; if (IS_ENABLED(CONFIG_X86_32)) mem_limit = KERNEL_IMAGE_SIZE; diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index a73e4d783cae..32f7cc8a8625 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -12,16 +12,13 @@ #include #include #include +#include +#include .text .code32 SYM_FUNC_START(get_sev_encryption_bit) - xor %eax, %eax - -#ifdef CONFIG_AMD_MEM_ENCRYPT push %ebx - push %ecx - push %edx movl $0x80000000, %eax /* CPUID to check the highest leaf */ cpuid @@ -52,12 +49,7 @@ SYM_FUNC_START(get_sev_encryption_bit) xor %eax, %eax .Lsev_exit: - pop %edx - pop %ecx pop %ebx - -#endif /* CONFIG_AMD_MEM_ENCRYPT */ - RET SYM_FUNC_END(get_sev_encryption_bit) @@ -98,7 +90,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid) jmp 1b SYM_CODE_END(sev_es_req_cpuid) -SYM_CODE_START(startup32_vc_handler) +SYM_CODE_START_LOCAL(startup32_vc_handler) pushl %eax pushl %ebx pushl %ecx @@ -184,15 +176,149 @@ SYM_CODE_START(startup32_vc_handler) jmp .Lfail SYM_CODE_END(startup32_vc_handler) +/* + * Write an IDT entry into boot32_idt + * + * Parameters: + * + * %eax: Handler address + * %edx: Vector number + * %ecx: IDT address + */ +SYM_FUNC_START_LOCAL(startup32_set_idt_entry) + /* IDT entry address to %ecx */ + leal (%ecx, %edx, 8), %ecx + + /* Build IDT entry, lower 4 bytes */ + movl %eax, %edx + andl $0x0000ffff, %edx # Target code segment offset [15:0] + orl $(__KERNEL32_CS << 16), %edx # Target code segment selector + + /* Store lower 4 bytes to IDT */ + movl %edx, (%ecx) + + /* Build IDT entry, upper 4 bytes */ + movl %eax, %edx + andl $0xffff0000, %edx # Target code segment offset [31:16] + orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate + + /* Store upper 4 bytes to IDT */ + movl %edx, 4(%ecx) + + RET +SYM_FUNC_END(startup32_set_idt_entry) + +SYM_FUNC_START(startup32_load_idt) + push %ebp + push %ebx + + call 1f +1: pop %ebp + + leal (boot32_idt - 1b)(%ebp), %ebx + + /* #VC handler */ + leal (startup32_vc_handler - 1b)(%ebp), %eax + movl $X86_TRAP_VC, %edx + movl %ebx, %ecx + call startup32_set_idt_entry + + /* Load IDT */ + leal (boot32_idt_desc - 1b)(%ebp), %ecx + movl %ebx, 2(%ecx) + lidt (%ecx) + + pop %ebx + pop %ebp + RET +SYM_FUNC_END(startup32_load_idt) + +/* + * Check for the correct C-bit position when the startup_32 boot-path is used. + * + * The check makes use of the fact that all memory is encrypted when paging is + * disabled. The function creates 64 bits of random data using the RDRAND + * instruction. RDRAND is mandatory for SEV guests, so always available. If the + * hypervisor violates that the kernel will crash right here. + * + * The 64 bits of random data are stored to a memory location and at the same + * time kept in the %eax and %ebx registers. Since encryption is always active + * when paging is off the random data will be stored encrypted in main memory. + * + * Then paging is enabled. When the C-bit position is correct all memory is + * still mapped encrypted and comparing the register values with memory will + * succeed. An incorrect C-bit position will map all memory unencrypted, so that + * the compare will use the encrypted random data and fail. + */ +SYM_FUNC_START(startup32_check_sev_cbit) + pushl %ebx + pushl %ebp + + call 0f +0: popl %ebp + + /* Check for non-zero sev_status */ + movl (sev_status - 0b)(%ebp), %eax + testl %eax, %eax + jz 4f + + /* + * Get two 32-bit random values - Don't bail out if RDRAND fails + * because it is better to prevent forward progress if no random value + * can be gathered. + */ +1: rdrand %eax + jnc 1b +2: rdrand %ebx + jnc 2b + + /* Store to memory and keep it in the registers */ + leal (sev_check_data - 0b)(%ebp), %ebp + movl %eax, 0(%ebp) + movl %ebx, 4(%ebp) + + /* Enable paging to see if encryption is active */ + movl %cr0, %edx /* Backup %cr0 in %edx */ + movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ + movl %ecx, %cr0 + + cmpl %eax, 0(%ebp) + jne 3f + cmpl %ebx, 4(%ebp) + jne 3f + + movl %edx, %cr0 /* Restore previous %cr0 */ + + jmp 4f + +3: /* Check failed - hlt the machine */ + hlt + jmp 3b + +4: + popl %ebp + popl %ebx + RET +SYM_FUNC_END(startup32_check_sev_cbit) + .code64 #include "../../kernel/sev_verify_cbit.S" .data -#ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 SYM_DATA(sme_me_mask, .quad 0) SYM_DATA(sev_status, .quad 0) SYM_DATA(sev_check_data, .quad 0) -#endif + +SYM_DATA_START_LOCAL(boot32_idt) + .rept 32 + .quad 0 + .endr +SYM_DATA_END(boot32_idt) + +SYM_DATA_START_LOCAL(boot32_idt_desc) + .word . - boot32_idt - 1 + .long 0 +SYM_DATA_END(boot32_idt_desc) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index cf690d8712f4..8ae7893d712f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -46,7 +46,7 @@ void *memmove(void *dest, const void *src, size_t n); /* * This is set up by the setup-routine at boot-time */ -struct boot_params *boot_params; +struct boot_params *boot_params_ptr; struct port_io_ops pio_ops; @@ -132,8 +132,8 @@ void __putstr(const char *s) if (lines == 0 || cols == 0) return; - x = boot_params->screen_info.orig_x; - y = boot_params->screen_info.orig_y; + x = boot_params_ptr->screen_info.orig_x; + y = boot_params_ptr->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -154,8 +154,8 @@ void __putstr(const char *s) } } - boot_params->screen_info.orig_x = x; - boot_params->screen_info.orig_y = y; + boot_params_ptr->screen_info.orig_x = x; + boot_params_ptr->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len, { } #endif -static void parse_elf(void *output) +static size_t parse_elf(void *output) { #ifdef CONFIG_X86_64 Elf64_Ehdr ehdr; @@ -293,10 +293,8 @@ static void parse_elf(void *output) if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || ehdr.e_ident[EI_MAG1] != ELFMAG1 || ehdr.e_ident[EI_MAG2] != ELFMAG2 || - ehdr.e_ident[EI_MAG3] != ELFMAG3) { + ehdr.e_ident[EI_MAG3] != ELFMAG3) error("Kernel is not a valid ELF file"); - return; - } debug_putstr("Parsing ELF... "); @@ -328,6 +326,35 @@ static void parse_elf(void *output) } free(phdrs); + + return ehdr.e_entry - LOAD_PHYSICAL_ADDR; +} + +const unsigned long kernel_total_size = VO__end - VO__text; + +static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); + +extern unsigned char input_data[]; +extern unsigned int input_len, output_len; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)) +{ + unsigned long entry; + + if (!free_mem_ptr) { + free_mem_ptr = (unsigned long)boot_heap; + free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap); + } + + if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len, + NULL, error) < 0) + return ULONG_MAX; + + entry = parse_elf(outbuf); + handle_relocations(outbuf, output_len, virt_addr); + + return entry; } /* @@ -347,25 +374,22 @@ static void parse_elf(void *output) * |-------uncompressed kernel image---------| * */ -asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, - unsigned char *input_data, - unsigned long input_len, - unsigned char *output, - unsigned long output_len) +asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) { - const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + memptr heap = (memptr)boot_heap; unsigned long needed_size; + size_t entry_offset; /* Retain x86 boot parameters pointer passed from startup_32/64. */ - boot_params = rmode; + boot_params_ptr = rmode; /* Clear flags intended for solely in-kernel use. */ - boot_params->hdr.loadflags &= ~KASLR_FLAG; + boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG; - sanitize_boot_params(boot_params); + sanitize_boot_params(boot_params_ptr); - if (boot_params->screen_info.orig_video_mode == 7) { + if (boot_params_ptr->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -373,8 +397,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, vidport = 0x3d4; } - lines = boot_params->screen_info.orig_video_lines; - cols = boot_params->screen_info.orig_video_cols; + lines = boot_params_ptr->screen_info.orig_video_lines; + cols = boot_params_ptr->screen_info.orig_video_cols; init_default_io_ops(); @@ -393,7 +417,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * so that early debugging output from the RSDP parsing code can be * collected. */ - boot_params->acpi_rsdp_addr = get_rsdp_addr(); + boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr(); debug_putstr("early console in extract_kernel\n"); @@ -411,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * entries. This ensures the full mapped area is usable RAM * and doesn't include any reserved areas. */ - needed_size = max(output_len, kernel_total_size); + needed_size = max_t(unsigned long, output_len, kernel_total_size); #ifdef CONFIG_X86_64 needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); #endif @@ -442,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); - if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) + if (virt_addr + needed_size > KERNEL_IMAGE_SIZE) error("Destination virtual address is beyond the kernel mapping area"); #else if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) @@ -454,16 +478,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #endif debug_putstr("\nDecompressing Linux... "); - __decompress(input_data, input_len, NULL, NULL, output, output_len, - NULL, error); - parse_elf(output); - handle_relocations(output, output_len, virt_addr); - debug_putstr("done.\nBooting the kernel.\n"); + + entry_offset = decompress_kernel(output, virt_addr, error); + + debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); + debug_puthex(entry_offset); + debug_putstr(").\n"); /* Disable exception handling before booting the kernel */ cleanup_exception_handling(); - return output; + return output + entry_offset; } void fortify_panic(const char *name) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 20118fb7c53b..254acd76efde 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -52,7 +52,6 @@ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; void *malloc(int size); void free(void *where); -extern struct boot_params *boot_params; void __putstr(const char *s); void __puthex(unsigned long value); #define error_putstr(__x) __putstr(__x) @@ -170,9 +169,7 @@ static inline int count_immovable_mem_regions(void) { return 0; } #endif /* ident_map_64.c */ -#ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; -#endif extern void kernel_add_identity_map(unsigned long start, unsigned long end); /* Used by PAGE_KERN* macros: */ @@ -190,6 +187,7 @@ static inline void cleanup_exception_handling(void) { } /* IDT Entry Points */ void boot_page_fault(void); +void boot_nmi_trap(void); void boot_stage1_vc(void); void boot_stage2_vc(void); diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index cc9b2529a086..6d595abe06b3 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -3,18 +3,16 @@ #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) -#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 - #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 - -#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE +#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 #ifndef __ASSEMBLER__ extern unsigned long *trampoline_32bit; -extern void trampoline_32bit_src(void *return_ptr); +extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); + +extern const u16 trampoline_ljmp_imm_offset; #endif /* __ASSEMBLER__ */ #endif /* BOOT_COMPRESSED_PAGETABLE_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 2ac12ff4111b..51f957b24ba7 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39; unsigned int __section(".data") ptrs_per_p4d = 1; #endif -struct paging_config { - unsigned long trampoline_start; - unsigned long l5_required; -}; - /* Buffer to preserve trampoline memory */ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; @@ -29,11 +24,10 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; * purposes. * * Avoid putting the pointer into .bss as it will be cleared between - * paging_prepare() and extract_kernel(). + * configure_5level_paging() and extract_kernel(). */ unsigned long *trampoline_32bit __section(".data"); -extern struct boot_params *boot_params; int cmdline_find_option_bool(const char *option); static unsigned long find_trampoline_placement(void) @@ -54,7 +48,7 @@ static unsigned long find_trampoline_placement(void) * * Only look for values in the legacy ROM for non-EFI system. */ - signature = (char *)&boot_params->efi_info.efi_loader_signature; + signature = (char *)&boot_params_ptr->efi_info.efi_loader_signature; if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) { ebda_start = *(unsigned short *)0x40e << 4; @@ -70,10 +64,10 @@ static unsigned long find_trampoline_placement(void) bios_start = round_down(bios_start, PAGE_SIZE); /* Find the first usable memory region under bios_start. */ - for (i = boot_params->e820_entries - 1; i >= 0; i--) { + for (i = boot_params_ptr->e820_entries - 1; i >= 0; i--) { unsigned long new = bios_start; - entry = &boot_params->e820_table[i]; + entry = &boot_params_ptr->e820_table[i]; /* Skip all entries above bios_start. */ if (bios_start <= entry->addr) @@ -106,12 +100,13 @@ static unsigned long find_trampoline_placement(void) return bios_start - TRAMPOLINE_32BIT_SIZE; } -struct paging_config paging_prepare(void *rmode) +asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) { - struct paging_config paging_config = {}; + void (*toggle_la57)(void *cr3); + bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ - boot_params = rmode; + boot_params_ptr = bp; /* * Check if LA57 is desired and supported. @@ -129,12 +124,22 @@ struct paging_config paging_prepare(void *rmode) !cmdline_find_option_bool("no5lvl") && native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { - paging_config.l5_required = 1; + l5_required = true; + + /* Initialize variables for 5-level paging */ + __pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; } - paging_config.trampoline_start = find_trampoline_placement(); + /* + * The trampoline will not be used if the paging mode is already set to + * the desired one. + */ + if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + return; - trampoline_32bit = (unsigned long *)paging_config.trampoline_start; + trampoline_32bit = (unsigned long *)find_trampoline_placement(); /* Preserve trampoline memory */ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); @@ -143,32 +148,32 @@ struct paging_config paging_prepare(void *rmode) memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); /* Copy trampoline code in place */ - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), + toggle_la57 = memcpy(trampoline_32bit + + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); + /* + * Avoid the need for a stack in the 32-bit trampoline code, by using + * LJMP rather than LRET to return back to long mode. LJMP takes an + * immediate absolute address, which needs to be adjusted based on the + * placement of the trampoline. + */ + *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) += + (unsigned long)toggle_la57; + /* * The code below prepares page table in trampoline memory. * * The new page table will be used by trampoline code for switching * from 4- to 5-level paging or vice versa. - * - * If switching is not required, the page table is unused: trampoline - * code wouldn't touch CR3. */ - /* - * We are not going to use the page table in trampoline memory if we - * are already in the desired paging mode. - */ - if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) - goto out; - - if (paging_config.l5_required) { + if (l5_required) { /* * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. */ - trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; + *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC; } else { unsigned long src; @@ -181,38 +186,17 @@ struct paging_config paging_prepare(void *rmode) * may be above 4G. */ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), - (void *)src, PAGE_SIZE); + memcpy(trampoline_32bit, (void *)src, PAGE_SIZE); } -out: - return paging_config; -} - -void cleanup_trampoline(void *pgtable) -{ - void *trampoline_pgtable; - - trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); + toggle_la57(trampoline_32bit); /* - * Move the top level page table out of trampoline memory, - * if it's there. + * Move the top level page table out of trampoline memory. */ - if ((void *)__native_read_cr3() == trampoline_pgtable) { - memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); - native_write_cr3((unsigned long)pgtable); - } + memcpy(pgtable, trampoline_32bit, PAGE_SIZE); + native_write_cr3((unsigned long)pgtable); /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); - - /* Initialize variables for 5-level paging */ -#ifdef CONFIG_X86_5LEVEL - if (__read_cr4() & X86_CR4_LA57) { - __pgtable_l5_enabled = 1; - pgdir_shift = 48; - ptrs_per_p4d = 512; - } -#endif } diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 9c91cc40f456..d07e665bb265 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -327,20 +327,25 @@ static void enforce_vmpl0(void) */ #define SNP_FEATURES_PRESENT (0) +u64 snp_get_unsupported_features(u64 status) +{ + if (!(status & MSR_AMD64_SEV_SNP_ENABLED)) + return 0; + + return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; +} + void snp_check_features(void) { u64 unsupported; - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) - return; - /* * Terminate the boot if hypervisor has enabled any feature lacking * guest side implementation. Pass on the unsupported features mask through * EXIT_INFO_2 of the GHCB protocol so that those features can be reported * as part of the guest boot failure. */ - unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; + unsupported = snp_get_unsupported_features(sev_status); if (unsupported) { if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); @@ -350,10 +355,45 @@ void snp_check_features(void) } } -void sev_enable(struct boot_params *bp) +/* + * sev_check_cpu_support - Check for SEV support in the CPU capabilities + * + * Returns < 0 if SEV is not supported, otherwise the position of the + * encryption bit in the page table descriptors. + */ +static int sev_check_cpu_support(void) { unsigned int eax, ebx, ecx, edx; + + /* Check for the SME/SEV support leaf */ + eax = 0x80000000; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x8000001f) + return -ENODEV; + + /* + * Check for the SME/SEV feature: + * CPUID Fn8000_001F[EAX] + * - Bit 0 - Secure Memory Encryption support + * - Bit 1 - Secure Encrypted Virtualization support + * CPUID Fn8000_001F[EBX] + * - Bits 5:0 - Pagetable bit position used to indicate encryption + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + /* Check whether SEV is supported */ + if (!(eax & BIT(1))) + return -ENODEV; + + return ebx & 0x3f; +} + +void sev_enable(struct boot_params *bp) +{ struct msr m; + int bitpos; bool snp; /* @@ -373,26 +413,7 @@ void sev_enable(struct boot_params *bp) * which is good enough. */ - /* Check for the SME/SEV support leaf */ - eax = 0x80000000; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - if (eax < 0x8000001f) - return; - - /* - * Check for the SME/SEV feature: - * CPUID Fn8000_001F[EAX] - * - Bit 0 - Secure Memory Encryption support - * - Bit 1 - Secure Encrypted Virtualization support - * CPUID Fn8000_001F[EBX] - * - Bits 5:0 - Pagetable bit position used to indicate encryption - */ - eax = 0x8000001f; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - /* Check whether SEV is supported */ - if (!(eax & BIT(1))) + if (sev_check_cpu_support() < 0) return; /* @@ -403,26 +424,8 @@ void sev_enable(struct boot_params *bp) /* Now repeat the checks with the SNP CPUID table. */ - /* Recheck the SME/SEV support leaf */ - eax = 0x80000000; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - if (eax < 0x8000001f) - return; - - /* - * Recheck for the SME/SEV feature: - * CPUID Fn8000_001F[EAX] - * - Bit 0 - Secure Memory Encryption support - * - Bit 1 - Secure Encrypted Virtualization support - * CPUID Fn8000_001F[EBX] - * - Bits 5:0 - Pagetable bit position used to indicate encryption - */ - eax = 0x8000001f; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - /* Check whether SEV is supported */ - if (!(eax & BIT(1))) { + bitpos = sev_check_cpu_support(); + if (bitpos < 0) { if (snp) error("SEV-SNP support indicated by CC blob, but not CPUID."); return; @@ -454,7 +457,24 @@ void sev_enable(struct boot_params *bp) if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); - sme_me_mask = BIT_ULL(ebx & 0x3f); + sme_me_mask = BIT_ULL(bitpos); +} + +/* + * sev_get_status - Retrieve the SEV status mask + * + * Returns 0 if the CPU is not SEV capable, otherwise the value of the + * AMD64_SEV MSR. + */ +u64 sev_get_status(void) +{ + struct msr m; + + if (sev_check_cpu_support() < 0) + return 0; + + boot_rdmsr(MSR_AMD64_SEV, &m); + return m.q; } /* Search for Confidential Computing blob in the EFI config table. */ @@ -545,7 +565,7 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) * accessed after switchover. */ if (sev_snp_enabled()) { - unsigned long cc_info_pa = boot_params->cc_blob_address; + unsigned long cc_info_pa = boot_params_ptr->cc_blob_address; struct cc_blob_sev_info *cc_info; kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info)); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f912d7770130..d31982509654 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -406,7 +406,7 @@ xloadflags: # define XLF1 0 #endif -#ifdef CONFIG_EFI_STUB +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL # ifdef CONFIG_EFI_MIXED # define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64) # else diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a3725ad46c5a..bd247692b701 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -290,6 +290,7 @@ static void efi_stub_entry_update(void) { unsigned long addr = efi32_stub_entry; +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL #ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ addr = efi64_stub_entry - 0x200; @@ -298,6 +299,7 @@ static void efi_stub_entry_update(void) #ifdef CONFIG_EFI_MIXED if (efi32_stub_entry != addr) die("32-bit and 64-bit EFI entry points do not match\n"); +#endif #endif put_unaligned_le32(addr, &buf[0x264]); } diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 49b44f881484..1d3ad275c366 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -13,8 +13,8 @@ #include #include -static enum cc_vendor vendor __ro_after_init; -static u64 cc_mask __ro_after_init; +enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; +u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) { @@ -83,7 +83,7 @@ static bool hyperv_cc_platform_has(enum cc_attr attr) bool cc_platform_has(enum cc_attr attr) { - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return amd_cc_platform_has(attr); case CC_VENDOR_INTEL: @@ -105,7 +105,7 @@ u64 cc_mkenc(u64 val) * - for AMD, bit *set* means the page is encrypted * - for Intel *clear* means encrypted. */ - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return val | cc_mask; case CC_VENDOR_INTEL: @@ -118,7 +118,7 @@ u64 cc_mkenc(u64 val) u64 cc_mkdec(u64 val) { /* See comment in cc_mkenc() */ - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return val & ~cc_mask; case CC_VENDOR_INTEL: @@ -128,13 +128,3 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); - -__init void cc_set_vendor(enum cc_vendor v) -{ - vendor = v; -} - -__init void cc_set_mask(u64 mask) -{ - cc_mask = mask; -} diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index d0565a9e7d8c..4692450aeb4d 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -793,7 +793,7 @@ void __init tdx_early_init(void) setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); - cc_set_vendor(CC_VENDOR_INTEL); + cc_vendor = CC_VENDOR_INTEL; tdx_parse_tdinfo(&cc_mask); cc_set_mask(cc_mask); diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index bfb7bcb362bc..09e99d13fc0b 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include .pushsection .noinstr.text, "ax" @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e309e7156038..ee5def1060c8 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -981,6 +982,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9953d966d124..c2383c2880ec 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -223,6 +223,7 @@ syscall_return_via_sysret: SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR swapgs + CLEAR_CPU_BUFFERS sysretq SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi swapgs + CLEAR_CPU_BUFFERS jmp .Lnative_iret @@ -767,6 +769,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1493,6 +1497,12 @@ nmi_restore: std movq $0, 5*8(%rsp) /* clear "NMI executing" */ + /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this @@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY ENDBR mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(ignore_sysret) #endif diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d6c08d8986b1..4bcd009a232b 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 04f4b96dec6d..fd091b9dd706 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,6 @@ static void amd_pmu_cpu_dead(int cpu) kfree(cpuhw->lbr_sel); cpuhw->lbr_sel = NULL; - amd_pmu_cpu_reset(cpu); if (!x86_pmu.amd_nb_constraints) return; diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index fbcfec4dc4cc..ca8eed1d496a 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -113,6 +113,20 @@ #endif +#ifndef __ASSEMBLY__ +#ifndef __pic__ +static __always_inline __pure void *rip_rel_ptr(void *p) +{ + asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p)); + + return p; +} +#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var))) +#else +#define RIP_REL_REF(var) (var) +#endif +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 215d37f7dde8..a38cc0afc90a 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -79,4 +79,14 @@ # define BOOT_STACK_SIZE 0x1000 #endif +#ifndef __ASSEMBLY__ +extern unsigned int output_len; +extern const unsigned long kernel_total_size; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)); + +extern struct boot_params *boot_params_ptr; +#endif + #endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 3d98c3a60d34..60bb26097da1 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_COCO_H #define _ASM_X86_COCO_H +#include #include enum cc_vendor { @@ -11,10 +12,15 @@ enum cc_vendor { CC_VENDOR_INTEL, }; -void cc_set_vendor(enum cc_vendor v); -void cc_set_mask(u64 mask); +extern enum cc_vendor cc_vendor; +extern u64 cc_mask; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +static inline void cc_set_mask(u64 mask) +{ + RIP_REL_REF(cc_mask) = mask; +} + u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce0c8f7d3218..f835b328ba24 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto( + asm goto( ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b122708792c4..9a157942ae3d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -304,7 +304,7 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ - +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ @@ -427,6 +427,7 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ @@ -477,4 +478,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 233ae6986d6f..e601264b1a24 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -88,6 +88,8 @@ static inline void efi_fpu_end(void) } #ifdef CONFIG_X86_32 +#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) + #define arch_efi_call_virt_setup() \ ({ \ efi_fpu_begin(); \ @@ -101,8 +103,7 @@ static inline void efi_fpu_end(void) }) #else /* !CONFIG_X86_32 */ - -#define EFI_LOADER_SIGNATURE "EL64" +#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT extern asmlinkage u64 __efi_call(void *fp, ...); @@ -214,6 +215,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, #ifdef CONFIG_EFI_MIXED +#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) + #define ARCH_HAS_EFISTUB_WRAPPERS static inline bool efi_is_64bit(void) @@ -325,6 +328,13 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) +/* Memory Attribute Protocol */ +#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 11203a9fe0a8..ffe72790ceaf 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 800ffce0db29..6b4d36c95165 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -9,7 +9,6 @@ static inline bool arch_irq_work_has_interrupt(void) { return boot_cpu_has(X86_FEATURE_APIC); } -extern void arch_irq_work_raise(void); #else static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 071572e23d3a..cbbef32517f0 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -24,7 +24,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes] # objtool NOPs this \n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (2 | branch) : : l_yes); @@ -38,7 +38,7 @@ l_yes: static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); @@ -52,7 +52,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes]\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h index 8fa6ac0e2d76..d91b37f5b4bb 100644 --- a/arch/x86/include/asm/kmsan.h +++ b/arch/x86/include/asm/kmsan.h @@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr) { unsigned long x = (unsigned long)addr; unsigned long y = x - __START_KERNEL_map; + bool ret; /* use the carry flag to determine if x was < __START_KERNEL_map */ if (unlikely(x > y)) { @@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr) return false; } - return pfn_valid(x >> PAGE_SHIFT); + /* + * pfn_valid() relies on RCU, and may call into the scheduler on exiting + * the critical section. However, this would result in recursion with + * KMSAN. Therefore, disable preemption here, and re-enable preemption + * below while suppressing reschedules to avoid recursion. + * + * Note, this sacrifices occasionally breaking scheduling guarantees. + * Although, a kernel compiled with KMSAN has already given up on any + * performance guarantees due to being heavily instrumented. + */ + preempt_disable(); + ret = pfn_valid(x >> PAGE_SHIFT); + preempt_enable_no_resched(); + + return ret; } #endif /* !MODULE */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c91326593e74..41d06822bc8c 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -15,7 +15,8 @@ #include #include -#include +#include +struct boot_params; #ifdef CONFIG_X86_MEM_ENCRYPT void __init mem_encrypt_init(void); @@ -57,6 +58,11 @@ void __init mem_encrypt_free_decrypted_mem(void); void __init sev_es_init_vc_handling(void); +static inline u64 sme_get_me_mask(void) +{ + return RIP_REL_REF(sme_me_mask); +} + #define __bss_decrypted __section(".bss..decrypted") #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -88,6 +94,8 @@ early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool en static inline void mem_encrypt_free_decrypted_mem(void) { } +static inline u64 sme_get_me_mask(void) { return 0; } + #define __bss_decrypted #endif /* CONFIG_AMD_MEM_ENCRYPT */ @@ -105,11 +113,6 @@ void add_encrypt_protection_map(void); extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; -static inline u64 sme_get_me_mask(void) -{ - return sme_me_mask; -} - #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec955ab2ff03..33a19ef23644 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -30,6 +30,7 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) @@ -38,6 +39,7 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) /* Intel MSRs. Some also available on other CPUs */ @@ -168,6 +170,14 @@ * CPU is not vulnerable to Gather * Data Sampling (GDS). */ +#define ARCH_CAP_RFDS_NO BIT(27) /* + * Not susceptible to Register + * File Data Sampling. + */ +#define ARCH_CAP_RFDS_CLEAR BIT(28) /* + * VERW clears CPU Register + * File. + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2f123d4fb85b..8f6f17a8617b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,19 @@ #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF + verw _ASM_RIP(mds_verw_sel) +.Lskip_verw_\@: +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -222,6 +235,8 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +extern void (*x86_return_thunk)(void); + #ifdef CONFIG_RETPOLINE #define GEN(reg) \ @@ -366,13 +381,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include /** @@ -398,17 +414,6 @@ static __always_inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } -/** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - /** * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 7fa611216417..1919ccf493cd 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -18,7 +18,7 @@ #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c = false; \ - asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ + asm goto (fullop "; j" #cc " %l[cc_label]" \ : : [var] "m" (_var), ## __VA_ARGS__ \ : clobbers : cc_label); \ if (0) { \ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 7ca5c9ec8b52..c57dd21155bd 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -157,6 +157,7 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern void sev_enable(struct boot_params *bp); static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { @@ -195,19 +196,22 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd unsigned long npages); void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages); -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); +void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +u64 snp_get_unsupported_features(u64 status); +u64 sev_get_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } @@ -215,16 +219,19 @@ static inline void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } -static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } +static inline void snp_dmi_setup(void) { } static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; } + +static inline u64 snp_get_unsupported_features(u64 status) { return 0; } +static inline u64 sev_get_status(void) { return 0; } #endif #endif diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index a800abb1a992..d8416b3bf832 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -12,11 +12,6 @@ /* image of the saved processor state */ struct saved_context { - /* - * On x86_32, all segment registers except gs are saved at kernel - * entry in pt_regs. - */ - u16 gs; unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; struct saved_msrs saved_msrs; @@ -27,6 +22,11 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + /* + * On x86_32, all segment registers except gs are saved at kernel + * entry in pt_regs. + */ + u16 gs; bool misc_enable_saved; } __attribute__((packed)); diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index fd2669b1cb2d..e3323a9dc911 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -58,12 +58,29 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); ,,regs->di,,regs->si,,regs->dx \ ,,regs->r10,,regs->r8,,regs->r9) \ + +/* SYSCALL_PT_ARGS is Adapted from s390x */ +#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \ + SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp)) +#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di)) +#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si)) +#define SYSCALL_PT_ARG3(m, t1, t2, t3) \ + SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx)) +#define SYSCALL_PT_ARG2(m, t1, t2) \ + SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx)) +#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx)) +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + +#define __SC_COMPAT_CAST(t, a) \ + (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \ + (unsigned int)a + /* Mapping of registers to parameters for syscalls on i386 */ #define SC_IA32_REGS_TO_ARGS(x, ...) \ - __MAP(x,__SC_ARGS \ - ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ - ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ - ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \ + __MAP(x, __SC_TYPE, __VA_ARGS__)) \ #define __SYS_STUB0(abi, name) \ long __##abi##_##name(const struct pt_regs *regs); \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 6ca0c661cb63..c638535eedd5 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -155,7 +155,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_goto_u64(x, addr, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ _ASM_EXTABLE_UA(1b, %l2) \ @@ -317,7 +317,7 @@ do { \ } while (0) #define __get_user_asm(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: mov"itype" %[umem],%[output]\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : [output] ltype(x) \ @@ -397,7 +397,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -416,7 +416,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -499,7 +499,7 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_goto(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: mov"itype" %0,%1\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : : ltype(x), "m" (__m(addr)) \ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 6c2e3ff3cb28..724ce44809ed 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -43,9 +43,9 @@ static inline int cpu_has_vmx(void) */ static inline int cpu_vmxoff(void) { - asm_volatile_goto("1: vmxoff\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "cc", "memory" : fault); + asm goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "cc", "memory" : fault); cr4_clear_bits(X86_CR4_VMXE); return 0; @@ -129,9 +129,9 @@ static inline void cpu_svm_disable(void) * case, GIF must already be set, otherwise the NMI would have * been blocked, so just eat the fault. */ - asm_volatile_goto("1: stgi\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "memory" : fault); + asm goto("1: stgi\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "memory" : fault); fault: wrmsrl(MSR_EFER, efer & ~EFER_SVME); } diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index ab60a71a8dcb..472f0263dbc6 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -4,6 +4,7 @@ #include #include +#include #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); @@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code, } #endif +/* + * The (legacy) vsyscall page is the long page in the kernel portion + * of the address space that has user-accessible permissions. + */ +static inline bool is_vsyscall_vaddr(unsigned long vaddr) +{ + return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); +} + #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 034e62838b28..c3e910b1d5a2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -30,12 +30,13 @@ struct x86_init_mpparse { * @reserve_resources: reserve the standard resources for the * platform * @memory_setup: platform specific memory setup - * + * @dmi_setup: platform specific DMI setup */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); + void (*dmi_setup)(void); }; /** diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 8d8752b44f11..ff8f25faca3d 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -20,7 +20,7 @@ bool cpc_supported_by_cpu(void) (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f))) return true; else if (boot_cpu_data.x86 == 0x17 && - boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f) + boot_cpu_data.x86_model >= 0x30 && boot_cpu_data.x86_model <= 0x7f) return true; return boot_cpu_has(X86_FEATURE_CPPC); } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6b8c93989aa3..69f85e274611 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -536,6 +536,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } #ifdef CONFIG_RETHUNK + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -551,14 +552,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - return -1; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (x86_return_thunk == __x86_return_thunk) + return -1; - bytes[i++] = RET_INSN_OPCODE; + i = JMP32_INSN_SIZE; + __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); + } else { + bytes[i++] = RET_INSN_OPCODE; + } for (; i < insn->length;) bytes[i++] = INT3_INSN_OPCODE; - return i; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c1d09c8844d6..425092806f8f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -997,11 +997,11 @@ static bool cpu_has_zenbleed_microcode(void) u32 good_rev = 0; switch (boot_cpu_data.x86_model) { - case 0x30 ... 0x3f: good_rev = 0x0830107a; break; - case 0x60 ... 0x67: good_rev = 0x0860010b; break; - case 0x68 ... 0x6f: good_rev = 0x08608105; break; - case 0x70 ... 0x7f: good_rev = 0x08701032; break; - case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; + case 0x30 ... 0x3f: good_rev = 0x0830107b; break; + case 0x60 ... 0x67: good_rev = 0x0860010c; break; + case 0x68 ... 0x6f: good_rev = 0x08608107; break; + case 0x70 ... 0x7f: good_rev = 0x08701033; break; + case 0xa0 ... 0xaf: good_rev = 0x08a00009; break; default: return false; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 13dffc43ded0..e3fec47a800b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -423,7 +420,14 @@ static void __init mmio_select_mitigation(void) */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + /* + * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based + * mitigations, disable KVM-only mitigation in that case. + */ + if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) + static_branch_disable(&mmio_stale_data_clear); else static_branch_enable(&mmio_stale_data_clear); @@ -475,6 +479,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str) } early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "Register File Data Sampling: " fmt + +enum rfds_mitigations { + RFDS_MITIGATION_OFF, + RFDS_MITIGATION_VERW, + RFDS_MITIGATION_UCODE_NEEDED, +}; + +/* Default mitigation for Register File Data Sampling */ +static enum rfds_mitigations rfds_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF; + +static const char * const rfds_strings[] = { + [RFDS_MITIGATION_OFF] = "Vulnerable", + [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File", + [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", +}; + +static void __init rfds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { + rfds_mitigation = RFDS_MITIGATION_OFF; + return; + } + if (rfds_mitigation == RFDS_MITIGATION_OFF) + return; + + if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; +} + +static __init int rfds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + return 0; + + if (!strcmp(str, "off")) + rfds_mitigation = RFDS_MITIGATION_OFF; + else if (!strcmp(str, "on")) + rfds_mitigation = RFDS_MITIGATION_VERW; + + return 0; +} +early_param("reg_file_data_sampling", rfds_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "" fmt @@ -483,12 +538,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { @@ -500,11 +555,19 @@ static void __init md_clear_update_mitigation(void) taa_mitigation = TAA_MITIGATION_VERW; taa_select_mitigation(); } - if (mmio_mitigation == MMIO_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + /* + * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear + * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state. + */ + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { mmio_mitigation = MMIO_MITIGATION_VERW; mmio_select_mitigation(); } + if (rfds_mitigation == RFDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_RFDS)) { + rfds_mitigation = RFDS_MITIGATION_VERW; + rfds_select_mitigation(); + } out: if (boot_cpu_has_bug(X86_BUG_MDS)) pr_info("MDS: %s\n", mds_strings[mds_mitigation]); @@ -514,6 +577,8 @@ out: pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + if (boot_cpu_has_bug(X86_BUG_RFDS)) + pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]); } static void __init md_clear_select_mitigation(void) @@ -521,11 +586,12 @@ static void __init md_clear_select_mitigation(void) mds_select_mitigation(); taa_select_mitigation(); mmio_select_mitigation(); + rfds_select_mitigation(); /* - * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update - * and print their mitigation after MDS, TAA and MMIO Stale Data - * mitigation selection is done. + * As these mitigations are inter-related and rely on VERW instruction + * to clear the microarchitural buffers, update and print their status + * after mitigation selection is done for each of these vulnerabilities. */ md_clear_update_mitigation(); } @@ -1288,19 +1354,21 @@ spectre_v2_user_select_mitigation(void) } /* - * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP + * If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP * is not required. * - * Enhanced IBRS also protects against cross-thread branch target + * Intel's Enhanced IBRS also protects against cross-thread branch target * injection in user-mode as the IBRS bit remains always set which * implicitly enables cross-thread protections. However, in legacy IBRS * mode, the IBRS bit is set only on kernel entry and cleared on return - * to userspace. This disables the implicit cross-thread protection, - * so allow for STIBP to be selected in that case. + * to userspace. AMD Automatic IBRS also does not protect userspace. + * These modes therefore disable the implicit cross-thread protection, + * so allow for STIBP to be selected in those cases. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS))) return; /* @@ -1330,9 +1398,9 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", - [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", - [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", - [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines", [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; @@ -1401,7 +1469,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + pr_err("%s selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -1586,8 +1654,12 @@ static void __init spectre_v2_select_mitigation(void) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); if (spectre_v2_in_ibrs_mode(mode)) { - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - update_spec_ctrl(x86_spec_ctrl_base); + if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) { + msr_set_bit(MSR_EFER, _EFER_AUTOIBRS); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + update_spec_ctrl(x86_spec_ctrl_base); + } } switch (mode) { @@ -1671,8 +1743,8 @@ static void __init spectre_v2_select_mitigation(void) /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS * and Enhanced IBRS protect firmware too, so enable IBRS around - * firmware calls only when IBRS / Enhanced IBRS aren't otherwise - * enabled. + * firmware calls only when IBRS / Enhanced / Automatic IBRS aren't + * otherwise enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if @@ -2502,74 +2574,74 @@ static const char * const l1tf_vmx_states[] = { static ssize_t l1tf_show_state(char *buf) { if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) - return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG); if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && sched_smt_active())) { - return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, - l1tf_vmx_states[l1tf_vmx_mitigation]); + return sysfs_emit(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation]); } - return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, - l1tf_vmx_states[l1tf_vmx_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t itlb_multihit_show_state(char *buf) { if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || !boot_cpu_has(X86_FEATURE_VMX)) - return sprintf(buf, "KVM: Mitigation: VMX unsupported\n"); + return sysfs_emit(buf, "KVM: Mitigation: VMX unsupported\n"); else if (!(cr4_read_shadow() & X86_CR4_VMXE)) - return sprintf(buf, "KVM: Mitigation: VMX disabled\n"); + return sysfs_emit(buf, "KVM: Mitigation: VMX disabled\n"); else if (itlb_multihit_kvm_mitigation) - return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); + return sysfs_emit(buf, "KVM: Mitigation: Split huge pages\n"); else - return sprintf(buf, "KVM: Vulnerable\n"); + return sysfs_emit(buf, "KVM: Vulnerable\n"); } #else static ssize_t l1tf_show_state(char *buf) { - return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG); } static ssize_t itlb_multihit_show_state(char *buf) { - return sprintf(buf, "Processor vulnerable\n"); + return sysfs_emit(buf, "Processor vulnerable\n"); } #endif static ssize_t mds_show_state(char *buf) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - return sprintf(buf, "%s; SMT Host state unknown\n", - mds_strings[mds_mitigation]); + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); } if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { - return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : - sched_smt_active() ? "mitigated" : "disabled")); + return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); } - return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t tsx_async_abort_show_state(char *buf) { if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || (taa_mitigation == TAA_MITIGATION_OFF)) - return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); + return sysfs_emit(buf, "%s\n", taa_strings[taa_mitigation]); if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - return sprintf(buf, "%s; SMT Host state unknown\n", - taa_strings[taa_mitigation]); + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + taa_strings[taa_mitigation]); } - return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t mmio_stale_data_show_state(char *buf) @@ -2589,9 +2661,15 @@ static ssize_t mmio_stale_data_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t rfds_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); +} + static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS)) return ""; switch (spectre_v2_user_stibp) { @@ -2637,47 +2715,46 @@ static char *pbrsb_eibrs_state(void) static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) - return sprintf(buf, "Vulnerable: LFENCE\n"); + return sysfs_emit(buf, "Vulnerable: LFENCE\n"); if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) - return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); if (sched_smt_active() && unprivileged_ebpf_enabled() && spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) - return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sprintf(buf, "%s%s%s%s%s%s%s\n", - spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - pbrsb_eibrs_state(), - spectre_v2_module_string()); + return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + pbrsb_eibrs_state(), + spectre_v2_module_string()); } static ssize_t srbds_show_state(char *buf) { - return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); + return sysfs_emit(buf, "%s\n", srbds_strings[srbds_mitigation]); } static ssize_t retbleed_show_state(char *buf) { if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET || retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) - return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sysfs_emit(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); - return sprintf(buf, "%s; SMT %s\n", - retbleed_strings[retbleed_mitigation], - !sched_smt_active() ? "disabled" : - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? - "enabled with STIBP protection" : "vulnerable"); + return sysfs_emit(buf, "%s; SMT %s\n", retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); } - return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } static ssize_t gds_show_state(char *buf) @@ -2699,26 +2776,26 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); switch (bug) { case X86_BUG_CPU_MELTDOWN: if (boot_cpu_has(X86_FEATURE_PTI)) - return sprintf(buf, "Mitigation: PTI\n"); + return sysfs_emit(buf, "Mitigation: PTI\n"); if (hypervisor_is_type(X86_HYPER_XEN_PV)) - return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); + return sysfs_emit(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); break; case X86_BUG_SPECTRE_V1: - return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); + return sysfs_emit(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: - return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + return sysfs_emit(buf, "%s\n", ssb_strings[ssb_mode]); case X86_BUG_L1TF: if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) @@ -2750,11 +2827,14 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRSO: return srso_show_state(buf); + case X86_BUG_RFDS: + return rfds_show_state(buf); + default: break; } - return sprintf(buf, "Vulnerable\n"); + return sysfs_emit(buf, "Vulnerable\n"); } ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) @@ -2824,4 +2904,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut { return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); } + +ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 454cdf341862..ca243d7ba0ea 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1212,8 +1212,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* Zhaoxin Family 7 */ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), @@ -1248,6 +1248,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define SRSO BIT(5) /* CPU is affected by GDS */ #define GDS BIT(6) +/* CPU is affected by Register File Data Sampling */ +#define RFDS BIT(7) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1275,9 +1277,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_N, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), @@ -1311,6 +1322,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap) ia32_cap & ARCH_CAP_SBDR_SSDP_NO); } +static bool __init vulnerable_to_rfds(u64 ia32_cap) +{ + /* The "immunity" bit trumps everything else: */ + if (ia32_cap & ARCH_CAP_RFDS_NO) + return false; + + /* + * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to + * indicate that mitigation is needed because guest is running on a + * vulnerable hardware or may migrate to such hardware: + */ + if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + return true; + + /* Only consult the blacklist when there is no enumeration: */ + return cpu_matches(cpu_vuln_blacklist, RFDS); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1333,8 +1362,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - if (ia32_cap & ARCH_CAP_IBRS_ALL) + /* + * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature + * flag and protect from vendor-specific bugs via the whitelist. + */ + if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { @@ -1396,11 +1433,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_RETBLEED); } - if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && - !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) - setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); - if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) setup_force_cpu_bug(X86_BUG_SMT_RSB); @@ -1419,6 +1451,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } + if (vulnerable_to_rfds(ia32_cap)) + setup_force_cpu_bug(X86_BUG_RFDS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 427899650483..32bd64017047 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -216,6 +216,90 @@ int intel_cpu_collect_info(struct ucode_cpu_info *uci) } EXPORT_SYMBOL_GPL(intel_cpu_collect_info); +#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme_early(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + */ + c->x86_phys_bits -= keyid_bits; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -367,6 +451,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (detect_extended_topology_early(c) < 0) detect_ht_early(c); + + /* + * Adjust the number of physical bits early because it affects the + * valid bits of the MTRR mask registers. + */ + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme_early(c); } static void bsp_init_intel(struct cpuinfo_x86 *c) @@ -527,90 +618,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -#define MSR_IA32_TME_ACTIVATE 0x982 - -/* Helpers to access TME_ACTIVATE MSR */ -#define TME_ACTIVATE_LOCKED(x) (x & 0x1) -#define TME_ACTIVATE_ENABLED(x) (x & 0x2) - -#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ -#define TME_ACTIVATE_POLICY_AES_XTS_128 0 - -#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ - -#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ -#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 - -/* Values for mktme_status (SW only construct) */ -#define MKTME_ENABLED 0 -#define MKTME_DISABLED 1 -#define MKTME_UNINITIALIZED 2 -static int mktme_status = MKTME_UNINITIALIZED; - -static void detect_tme(struct cpuinfo_x86 *c) -{ - u64 tme_activate, tme_policy, tme_crypto_algs; - int keyid_bits = 0, nr_keyids = 0; - static u64 tme_activate_cpu0 = 0; - - rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); - - if (mktme_status != MKTME_UNINITIALIZED) { - if (tme_activate != tme_activate_cpu0) { - /* Broken BIOS? */ - pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); - pr_err_once("x86/tme: MKTME is not usable\n"); - mktme_status = MKTME_DISABLED; - - /* Proceed. We may need to exclude bits from x86_phys_bits. */ - } - } else { - tme_activate_cpu0 = tme_activate; - } - - if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { - pr_info_once("x86/tme: not enabled by BIOS\n"); - mktme_status = MKTME_DISABLED; - return; - } - - if (mktme_status != MKTME_UNINITIALIZED) - goto detect_keyid_bits; - - pr_info("x86/tme: enabled by BIOS\n"); - - tme_policy = TME_ACTIVATE_POLICY(tme_activate); - if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) - pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); - - tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); - if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { - pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", - tme_crypto_algs); - mktme_status = MKTME_DISABLED; - } -detect_keyid_bits: - keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); - nr_keyids = (1UL << keyid_bits) - 1; - if (nr_keyids) { - pr_info_once("x86/mktme: enabled by BIOS\n"); - pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); - } else { - pr_info_once("x86/mktme: disabled by BIOS\n"); - } - - if (mktme_status == MKTME_UNINITIALIZED) { - /* MKTME is usable */ - mktme_status = MKTME_ENABLED; - } - - /* - * KeyID bits effectively lower the number of physical address - * bits. Update cpuinfo_x86::x86_phys_bits accordingly. - */ - c->x86_phys_bits -= keyid_bits; -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -747,9 +754,6 @@ static void init_intel(struct cpuinfo_x86 *c) init_ia32_feat_ctl(c); - if (cpu_has(c, X86_FEATURE_TME)) - detect_tme(c); - init_intel_misc_features(c); split_lock_init(); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index f1a748da5fab..cad6ea1911e9 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -239,6 +240,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) struct llist_node *pending; struct mce_evt_llist *l; int apei_err = 0; + struct page *p; /* * Allow instrumentation around external facilities usage. Not that it @@ -292,6 +294,20 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) if (!fake_panic) { if (panic_timeout == 0) panic_timeout = mca_cfg.panic_timeout; + + /* + * Kdump skips the poisoned page in order to avoid + * touching the error bits again. Poison the page even + * if the error is fatal and the machine is about to + * panic. + */ + if (kexec_crash_loaded()) { + if (final && (final->status & MCI_STATUS_ADDRV)) { + p = pfn_to_online_page(final->addr >> PAGE_SHIFT); + if (p) + SetPageHWPoison(p); + } + } panic(msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 34d9e899e471..9b039e9635e4 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -344,7 +344,7 @@ static void __init ms_hyperv_init_platform(void) /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) - cc_set_vendor(CC_VENDOR_HYPERV); + cc_vendor = CC_VENDOR_HYPERV; } } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 0b5c6c76f6f7..4761d489a117 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -281,14 +281,10 @@ struct rftype { * struct mbm_state - status for each MBM counter in each domain * @prev_bw_bytes: Previous bytes value read for bandwidth calculation * @prev_bw: The most recent bandwidth in MBps - * @delta_bw: Difference between the current and previous bandwidth - * @delta_comp: Indicates whether to compute the delta_bw */ struct mbm_state { u64 prev_bw_bytes; u32 prev_bw; - u32 delta_bw; - bool delta_comp; }; /** diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 77538abeb72a..b9adb707750c 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -428,9 +428,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) cur_bw = bytes / SZ_1M; - if (m->delta_comp) - m->delta_bw = abs(cur_bw - m->prev_bw); - m->delta_comp = false; m->prev_bw = cur_bw; } @@ -508,11 +505,11 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) { u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; - u32 cur_bw, delta_bw, user_bw; struct rdt_resource *r_mba; struct rdt_domain *dom_mba; struct list_head *head; struct rdtgroup *entry; + u32 cur_bw, user_bw; if (!is_mbm_local_enabled()) return; @@ -531,7 +528,6 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) cur_bw = pmbm_data->prev_bw; user_bw = dom_mba->mbps_val[closid]; - delta_bw = pmbm_data->delta_bw; /* MBA resource doesn't support CDP */ cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); @@ -543,49 +539,31 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) list_for_each_entry(entry, head, mon.crdtgrp_list) { cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; cur_bw += cmbm_data->prev_bw; - delta_bw += cmbm_data->delta_bw; } /* * Scale up/down the bandwidth linearly for the ctrl group. The * bandwidth step is the bandwidth granularity specified by the * hardware. - * - * The delta_bw is used when increasing the bandwidth so that we - * dont alternately increase and decrease the control values - * continuously. - * - * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if - * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep - * switching between 90 and 110 continuously if we only check - * cur_bw < user_bw. + * Always increase throttling if current bandwidth is above the + * target set by user. + * But avoid thrashing up and down on every poll by checking + * whether a decrease in throttling is likely to push the group + * back over target. E.g. if currently throttling to 30% of bandwidth + * on a system with 10% granularity steps, check whether moving to + * 40% would go past the limit by multiplying current bandwidth by + * "(30 + 10) / 30". */ if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) { new_msr_val = cur_msr_val - r_mba->membw.bw_gran; } else if (cur_msr_val < MAX_MBA_BW && - (user_bw > (cur_bw + delta_bw))) { + (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) { new_msr_val = cur_msr_val + r_mba->membw.bw_gran; } else { return; } resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); - - /* - * Delta values are updated dynamically package wise for each - * rdtgrp every time the throttle MSR changes value. - * - * This is because (1)the increase in bandwidth is not perfectly - * linear and only "approximately" linear even when the hardware - * says it is linear.(2)Also since MBA is a core specific - * mechanism, the delta values vary based on number of cores used - * by the rdtgrp. - */ - pmbm_data->delta_comp = true; - list_for_each_entry(entry, head, mon.crdtgrp_list) { - cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; - cmbm_data->delta_comp = true; - } } static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9dac24680ff8..993734e96615 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* - * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need - * to be reserved. + * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by + * kexec and do not need to be reserved. */ - if (data->type != SETUP_EFI && data->type != SETUP_IMA) + if (data->type != SETUP_EFI && + data->type != SETUP_IMA && + data->type != SETUP_RNG_SEED) e820__range_update_kexec(pa_data, sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e963344b0449..53935b4d62e3 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -2,6 +2,7 @@ /* * EISA specific code */ +#include #include #include #include @@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void) { void __iomem *p; - if (xen_pv_domain() && !xen_initial_domain()) + if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return 0; p = ioremap(0x0FFFD9, 4); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 558076dbde5b..247f2225aa9f 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ -static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only, unsigned int size) +static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = ¤t->thread.fpu; int ret; + /* Restore enabled features only. */ + xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ @@ -309,7 +310,7 @@ retry: if (ret != X86_TRAP_PF) return false; - if (!fault_in_readable(buf, size)) + if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } @@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; - unsigned int state_size; u64 user_xfeatures = 0; if (use_xsave()) { @@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, return false; fx_only = !fx_sw_user.magic1; - state_size = fx_sw_user.xstate_size; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; - state_size = fpu->fpstate->user_size; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ - return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, - state_size); + return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index ebe698f8af73..2aa849705bb6 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -177,10 +177,11 @@ void fpu__init_cpu_xstate(void) * Must happen after CR4 setup and before xsetbv() to allow KVM * lazy passthrough. Write independent of the dynamic state static * key as that does not work on the boot CPU. This also ensures - * that any stale state is wiped out from XFD. + * that any stale state is wiped out from XFD. Reset the per CPU + * xfd cache too. */ if (cpu_feature_enabled(X86_FEATURE_XFD)) - wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); + xfd_set_state(init_fpstate.xfd); /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 3518fb26d06b..19ca623ffa2a 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs #endif #ifdef CONFIG_X86_64 +static inline void xfd_set_state(u64 xfd) +{ + wrmsrl(MSR_IA32_XFD, xfd); + __this_cpu_write(xfd_state, xfd); +} + static inline void xfd_update_state(struct fpstate *fpstate) { if (fpu_state_size_dynamic()) { u64 xfd = fpstate->xfd; - if (__this_cpu_read(xfd_state) != xfd) { - wrmsrl(MSR_IA32_XFD, xfd); - __this_cpu_write(xfd_state, xfd); - } + if (__this_cpu_read(xfd_state) != xfd) + xfd_set_state(xfd); } } extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu); #else +static inline void xfd_set_state(u64 xfd) { } + static inline void xfd_update_state(struct fpstate *fpstate) { } static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e07234ec7e23..ec51ce713dea 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -361,7 +361,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 71f336425e58..54732da52dc1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1436,7 +1436,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) memset(&curr_time, 0, sizeof(struct rtc_time)); if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) { - if (unlikely(mc146818_get_time(&curr_time) < 0)) { + if (unlikely(mc146818_get_time(&curr_time, 10) < 0)) { pr_err_ratelimited("unable to read current time from RTC\n"); return IRQ_HANDLED; } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6120f25b0d5c..991f00c817e6 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -301,7 +301,16 @@ static int can_probe(unsigned long paddr) kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - if (is_endbr(*(u32 *)addr)) { + u32 insn; + + /* + * Since 'addr' is not guaranteed to be safe to access, use + * copy_from_kernel_nofault() to read the instruction: + */ + if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) + return NULL; + + if (is_endbr(insn)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index cec0bfa3bc04..ed6cce6c3950 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -522,9 +522,6 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 319fef37d9dc..cc2c34ba7228 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -203,16 +203,6 @@ void __init probe_roms(void) unsigned char c; int i; - /* - * The ROM memory range is not part of the e820 table and is therefore not - * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted - * memory, and SNP requires encrypted memory to be validated before access. - * Do that here. - */ - snp_prep_memory(video_rom_resource.start, - ((system_rom_resource.end + 1) - video_rom_resource.start), - SNP_PAGE_STATE_PRIVATE); - /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 349046434513..344c141133e4 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -67,7 +67,7 @@ void mach_get_cmos_time(struct timespec64 *now) return; } - if (mc146818_get_time(&tm)) { + if (mc146818_get_time(&tm, 1000)) { pr_err("Unable to read current time from RTC\n"); now->tv_sec = now->tv_nsec = 0; return; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 804a252382da..d1ffac9ad611 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -1032,7 +1031,7 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); - dmi_setup(); + x86_init.resources.dmi_setup(); /* * VMware detection requires dmi to be available, so this diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 71d8698702ce..271e70d5748e 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -553,9 +553,9 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; /* Skip post-processing for out-of-range zero leafs. */ - if (!(leaf->fn <= cpuid_std_range_max || - (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || - (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) + if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) || + (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) || + (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max)))) return 0; } @@ -1060,10 +1060,10 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; if (fn->eax_in == 0x0) - cpuid_std_range_max = fn->eax; + RIP_REL_REF(cpuid_std_range_max) = fn->eax; else if (fn->eax_in == 0x40000000) - cpuid_hyp_range_max = fn->eax; + RIP_REL_REF(cpuid_hyp_range_max) = fn->eax; else if (fn->eax_in == 0x80000000) - cpuid_ext_range_max = fn->eax; + RIP_REL_REF(cpuid_ext_range_max) = fn->eax; } } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index c8dfb0fdde7f..e35fcc8d4bae 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -736,7 +737,7 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd * This eliminates worries about jump tables or checking boot_cpu_data * in the cc_platform_has() function. */ - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) return; /* @@ -758,7 +759,7 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr * This eliminates worries about jump tables or checking boot_cpu_data * in the cc_platform_has() function. */ - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) return; /* Invalidate the memory pages before they are marked shared in the RMP table. */ @@ -768,21 +769,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED); } -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) -{ - unsigned long vaddr, npages; - - vaddr = (unsigned long)__va(paddr); - npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - - if (op == SNP_PAGE_STATE_PRIVATE) - early_snp_set_memory_private(vaddr, paddr, npages); - else if (op == SNP_PAGE_STATE_SHARED) - early_snp_set_memory_shared(vaddr, paddr, npages); - else - WARN(1, "invalid memory op %d\n", op); -} - static int vmgexit_psc(struct snp_psc_desc *desc) { int cur_entry, end_entry, ret = 0; @@ -2152,6 +2138,17 @@ void __init __noreturn snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +/* + * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are + * enabled, as the alternative (fallback) logic for DMI probing in the legacy + * ROM region can cause a crash since this region is not pre-validated. + */ +void __init snp_dmi_setup(void) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + dmi_setup(); +} + static void dump_cpuid_table(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 3fbb49168827..b32134b093ec 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -80,7 +80,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case RET: if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; break; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 41e5b4cb898c..a4a921b9e664 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -3,6 +3,7 @@ * * For licencing details see kernel-base/COPYING */ +#include #include #include #include @@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, + .dmi_setup = dmi_setup, }, .mpparse = { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c3ef1fc602bf..62a44455c51d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -535,9 +535,9 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) } static __always_inline -void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) +void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_mask for non-scattered leafs. */ + /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */ BUILD_BUG_ON(leaf < NCAPINTS); kvm_cpu_caps[leaf] = mask; @@ -547,7 +547,7 @@ void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_init_scattered for scattered leafs. */ + /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ BUILD_BUG_ON(leaf >= NCAPINTS); kvm_cpu_caps[leaf] &= mask; @@ -652,11 +652,16 @@ void kvm_set_cpu_caps(void) F(AVX_VNNI) | F(AVX512_BF16) ); + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, + F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) | + F(BHI_CTRL) | F(MCDT_NO) + ); + kvm_cpu_cap_mask(CPUID_D_1_EAX, F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); - kvm_cpu_cap_init_scattered(CPUID_12_EAX, + kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, SF(SGX1) | SF(SGX2) ); @@ -902,13 +907,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; /* function 7 has additional index. */ case 7: - entry->eax = min(entry->eax, 1u); + max_idx = entry->eax = min(entry->eax, 2u); cpuid_entry_override(entry, CPUID_7_0_EBX); cpuid_entry_override(entry, CPUID_7_ECX); cpuid_entry_override(entry, CPUID_7_EDX); - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */ - if (entry->eax == 1) { + /* KVM only supports up to 0x7.2, capped above via min(). */ + if (max_idx >= 1) { entry = do_host_cpuid(array, function, 1); if (!entry) goto out; @@ -918,6 +923,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = 0; entry->edx = 0; } + if (max_idx >= 2) { + entry = do_host_cpuid(array, function, 2); + if (!entry) + goto out; + + cpuid_entry_override(entry, CPUID_7_2_EDX); + entry->ecx = 0; + entry->ebx = 0; + entry->eax = 0; + } break; case 0xa: { /* Architectural Performance Monitoring */ union cpuid10_eax eax; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index edcf45e312b9..bfeafe485552 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -40,6 +40,7 @@ #include "ioapic.h" #include "trace.h" #include "x86.h" +#include "xen.h" #include "cpuid.h" #include "hyperv.h" @@ -338,8 +339,10 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) } /* Check if there are APF page ready requests pending */ - if (enabled) + if (enabled) { kvm_make_request(KVM_REQ_APF_READY, apic->vcpu); + kvm_xen_sw_enable_lapic(apic->vcpu); + } } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 7eeade35a425..7c8e2b20a13b 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -7,23 +7,44 @@ #include /* - * Hardware-defined CPUID leafs that are scattered in the kernel, but need to - * be directly used by KVM. Note, these word values conflict with the kernel's - * "bug" caps, but KVM doesn't use those. + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, + CPUID_7_2_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; +/* + * Define a KVM-only feature flag. + * + * For features that are scattered by cpufeatures.h, __feature_translate() also + * needs to be updated to translate the kernel-defined feature into the + * KVM-defined feature. + * + * For features that are 100% KVM-only, i.e. not defined by cpufeatures.h, + * forego the intermediate KVM_X86_FEATURE and directly define X86_FEATURE_* so + * that X86_FEATURE_* can be used in KVM. No __feature_translate() handling is + * needed in this case. + */ #define KVM_X86_FEATURE(w, f) ((w)*32 + (f)) /* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */ #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ +#define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0) +#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) +#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) +#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) + struct cpuid_reg { u32 function; u32 index; @@ -49,6 +70,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, }; /* @@ -75,12 +97,16 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) */ static __always_inline u32 __feature_translate(int x86_feature) { - if (x86_feature == X86_FEATURE_SGX1) - return KVM_X86_FEATURE_SGX1; - else if (x86_feature == X86_FEATURE_SGX2) - return KVM_X86_FEATURE_SGX2; +#define KVM_X86_TRANSLATE_FEATURE(f) \ + case X86_FEATURE_##f: return KVM_X86_FEATURE_##f - return x86_feature; + switch (x86_feature) { + KVM_X86_TRANSLATE_FEATURE(SGX1); + KVM_X86_TRANSLATE_FEATURE(SGX2); + KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + default: + return x86_feature; + } } static __always_inline u32 __feature_leaf(int x86_feature) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index bc288e6bde64..5d4d78c9a787 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -239,18 +239,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size) kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1); } -static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl) -{ - /* Nested FLUSHBYASID is not supported yet. */ - switch(tlb_ctl) { - case TLB_CONTROL_DO_NOTHING: - case TLB_CONTROL_FLUSH_ALL_ASID: - return true; - default: - return false; - } -} - static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu, struct vmcb_ctrl_area_cached *control) { @@ -270,8 +258,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu, IOPM_SIZE))) return false; - if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl))) - return false; return true; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3060fe4e9731..3dc0ee1fe9db 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1958,20 +1958,22 @@ int sev_mem_enc_register_region(struct kvm *kvm, goto e_free; } + /* + * The guest may change the memory encryption attribute from C=0 -> C=1 + * or vice versa for this memory range. Lets make sure caches are + * flushed to ensure that guest data gets written into memory with + * correct C-bit. Note, this must be done before dropping kvm->lock, + * as region and its array of pages can be freed by a different task + * once kvm->lock is released. + */ + sev_clflush_pages(region->pages, region->npages); + region->uaddr = range->addr; region->size = range->size; list_add_tail(®ion->list, &sev->regions_list); mutex_unlock(&kvm->lock); - /* - * The guest may change the memory encryption attribute from C=0 -> C=1 - * or vice versa for this memory range. Lets make sure caches are - * flushed to ensure that guest data gets written into memory with - * correct C-bit. - */ - sev_clflush_pages(region->pages, region->npages); - return ret; e_free: diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 36c8af87a707..4e725854c63a 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -8,7 +8,7 @@ #define svm_asm(insn, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) "\n\t" \ + asm goto("1: " __stringify(insn) "\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ ::: clobber : fault); \ return; \ @@ -18,7 +18,7 @@ fault: \ #define svm_asm1(insn, op1, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1 : clobber : fault); \ return; \ @@ -28,7 +28,7 @@ fault: \ #define svm_asm2(insn, op1, op2, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1, op2 : clobber : fault); \ return; \ diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9a75a0d5deae..220cdbe1e286 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -38,7 +38,7 @@ static int fixed_pmc_events[] = {1, 0, 7}; static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) { struct kvm_pmc *pmc; - u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; + u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; int i; pmu->fixed_ctr_ctrl = data; diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index edc3f16cc189..6a9bfdfbb6e5 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -2,7 +2,10 @@ #ifndef __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H -#define VMX_RUN_VMRESUME (1 << 0) -#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) +#define VMX_RUN_VMRESUME_SHIFT 0 +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 + +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0b5db4de4d09..0b2cad66dee1 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb $VMX_RUN_VMRESUME, %bl + bt $VMX_RUN_VMRESUME_SHIFT, %bx /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -128,8 +128,11 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Check EFLAGS.ZF from 'testb' above */ - jz .Lvmlaunch + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ + jnc .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 98d732b9418f..5c1590855ffc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + vmx_fb_clear_ctrl_available; /* * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS @@ -2469,10 +2470,10 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer) cr4_set_bits(X86_CR4_VMXE); - asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" - _ASM_EXTABLE(1b, %l[fault]) - : : [vmxon_pointer] "m"(vmxon_pointer) - : : fault); + asm goto("1: vmxon %[vmxon_pointer]\n\t" + _ASM_EXTABLE(1b, %l[fault]) + : : [vmxon_pointer] "m"(vmxon_pointer) + : : fault); return 0; fault: @@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, { guest_state_enter_irqoff(); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index ec268df83ed6..5edab28dfb2e 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -73,7 +73,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT - asm_volatile_goto("1: vmread %[field], %[output]\n\t" + asm_goto_output("1: vmread %[field], %[output]\n\t" "jna %l[do_fail]\n\t" _ASM_EXTABLE(1b, %l[do_exception]) @@ -166,7 +166,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) #define vmx_asm1(insn, op1, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ @@ -183,7 +183,7 @@ fault: \ #define vmx_asm2(insn, op1, op2, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7144e5166813..0e6e63a8f094 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1613,7 +1613,8 @@ static unsigned int num_msr_based_features; ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO) + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) static u64 kvm_get_arch_capabilities(void) { @@ -1650,6 +1651,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_SSB_NO; if (!boot_cpu_has_bug(X86_BUG_MDS)) data |= ARCH_CAP_MDS_NO; + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + data |= ARCH_CAP_RFDS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* @@ -7755,6 +7758,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (r < 0) return X86EMUL_UNHANDLEABLE; + + /* + * Mark the page dirty _before_ checking whether or not the CMPXCHG was + * successful, as the old value is written back on failure. Note, for + * live migration, this is unnecessarily conservative as CMPXCHG writes + * back the original value and the access is atomic, but KVM's ABI is + * that all writes are dirty logged, regardless of the value written. + */ + kvm_vcpu_mark_page_dirty(vcpu, gpa_to_gfn(gpa)); + if (r) return X86EMUL_CMPXCHG_FAILED; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index a58a426e6b1c..684a39df60d9 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -314,7 +314,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); } -static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) +void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) { struct kvm_lapic_irq irq = { }; int r; diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 532a535a9e99..500d9593a5a3 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -16,6 +16,7 @@ extern struct static_key_false_deferred kvm_xen_enabled; int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu); void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu); +void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *vcpu); int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); @@ -33,6 +34,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); +static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) +{ + /* + * The local APIC is being enabled. If the per-vCPU upcall vector is + * set and the vCPU's evtchn_upcall_pending flag is set, inject the + * interrupt. + */ + if (static_branch_unlikely(&kvm_xen_enabled.key) && + vcpu->arch.xen.vcpu_info_cache.active && + vcpu->arch.xen.upcall_vector && __kvm_xen_has_interrupt(vcpu)) + kvm_xen_inject_vcpu_vector(vcpu); +} + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { return static_branch_unlikely(&kvm_xen_enabled.key) && @@ -98,6 +112,10 @@ static inline void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) { } +static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) +{ +} + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { return false; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1dbbad73192a..f20636510eb1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -818,15 +818,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, show_opcodes(regs, loglvl); } -/* - * The (legacy) vsyscall page is the long page in the kernel portion - * of the address space that has user-accessible permissions. - */ -static bool is_vsyscall_vaddr(unsigned long vaddr) -{ - return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); -} - static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 968d7005f4a7..f50cc210a981 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; + bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - if (info->direct_gbpages) { + /* if this is already a gbpage, this portion is already mapped */ + if (pud_large(*pud)) + continue; + + /* Is using a gbpage allowed? */ + use_gbpage = info->direct_gbpages; + + /* Don't use gbpage if it maps more than the requested region. */ + /* at the begining: */ + use_gbpage &= ((addr & ~PUD_MASK) == 0); + /* ... or at the end: */ + use_gbpage &= ((next & ~PUD_MASK) == 0); + + /* Never overwrite existing mappings */ + use_gbpage &= !pud_present(*pud); + + if (use_gbpage) { pud_t pudval; - if (pud_present(*pud)) - continue; - - addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index 6993f026adec..42115ac079cf 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -3,6 +3,8 @@ #include #include +#include + #ifdef CONFIG_X86_64 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { @@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) if (vaddr < TASK_SIZE_MAX + PAGE_SIZE) return false; + /* + * Reading from the vsyscall page may cause an unhandled fault in + * certain cases. Though it is at an address above TASK_SIZE_MAX, it is + * usually considered as a user space address. + */ + if (is_vsyscall_vaddr(vaddr)) + return false; + /* * Allow everything during early boot before 'x86_virt_bits' * is initialized. Needed for instruction decoding in early diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 3e93af083e03..d4957eefef26 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -513,6 +513,24 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ENABLED) ia32_disable(); + + /* + * Override init functions that scan the ROM region in SEV-SNP guests, + * as this memory is not pre-validated and would thus cause a crash. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.pci.init_irq = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; + + /* + * DMI setup behavior for SEV-SNP guests depends on + * efi_enabled(EFI_CONFIG_TABLES), which hasn't been + * parsed yet. snp_dmi_setup() will run after that + * parsing has happened. + */ + x86_init.resources.dmi_setup = snp_dmi_setup; + } } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index d94ebd8acdfd..06ccbd36e8dc 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -97,7 +97,6 @@ static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; -static char sme_cmdline_off[] __initdata = "off"; static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) { @@ -305,7 +304,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * instrumentation or checking boot_cpu_data in the cc_platform_has() * function. */ - if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED) + if (!sme_get_me_mask() || + RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED) return; /* @@ -504,10 +504,9 @@ void __init sme_encrypt_kernel(struct boot_params *bp) void __init sme_enable(struct boot_params *bp) { - const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; + const char *cmdline_ptr, *cmdline_arg, *cmdline_on; unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; - bool active_by_default; unsigned long me_mask; char buffer[16]; bool snp; @@ -543,11 +542,11 @@ void __init sme_enable(struct boot_params *bp) me_mask = 1UL << (ebx & 0x3f); /* Check the SEV MSR whether SEV or SME is enabled */ - sev_status = __rdmsr(MSR_AMD64_SEV); - feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; + RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV); + feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */ - if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED)) snp_abort(); /* Check if memory encryption is enabled */ @@ -573,7 +572,6 @@ void __init sme_enable(struct boot_params *bp) return; } else { /* SEV state cannot be controlled by a command line option */ - sme_me_mask = me_mask; goto out; } @@ -588,31 +586,17 @@ void __init sme_enable(struct boot_params *bp) asm ("lea sme_cmdline_on(%%rip), %0" : "=r" (cmdline_on) : "p" (sme_cmdline_on)); - asm ("lea sme_cmdline_off(%%rip), %0" - : "=r" (cmdline_off) - : "p" (sme_cmdline_off)); - - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) - active_by_default = true; - else - active_by_default = false; cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | ((u64)bp->ext_cmd_line_ptr << 32)); - if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0) + if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 || + strncmp(buffer, cmdline_on, sizeof(buffer))) return; - if (!strncmp(buffer, cmdline_on, sizeof(buffer))) - sme_me_mask = me_mask; - else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) - sme_me_mask = 0; - else - sme_me_mask = active_by_default ? me_mask : 0; out: - if (sme_me_mask) { - physical_mask &= ~sme_me_mask; - cc_set_vendor(CC_VENDOR_AMD); - cc_set_mask(sme_me_mask); - } + RIP_REL_REF(sme_me_mask) = me_mask; + physical_mask &= ~me_mask; + cc_vendor = CC_VENDOR_AMD; + cc_set_mask(me_mask); } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index aa39d678fe81..dae5c952735c 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -961,7 +961,7 @@ static int __init cmp_memblk(const void *a, const void *b) const struct numa_memblk *ma = *(const struct numa_memblk **)a; const struct numa_memblk *mb = *(const struct numa_memblk **)b; - return ma->start - mb->start; + return (ma->start > mb->start) - (ma->start < mb->start); } static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; @@ -971,14 +971,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; * @start: address to begin fill * @end: address to end fill * - * Find and extend numa_meminfo memblks to cover the @start-@end - * physical address range, such that the first memblk includes - * @start, the last memblk includes @end, and any gaps in between - * are filled. + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end * * RETURNS: * 0 : Success - * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end */ int __init numa_fill_memblks(u64 start, u64 end) @@ -990,17 +988,14 @@ int __init numa_fill_memblks(u64 start, u64 end) /* * Create a list of pointers to numa_meminfo memblks that - * overlap start, end. Exclude (start == bi->end) since - * end addresses in both a CFMWS range and a memblk range - * are exclusive. - * - * This list of pointers is used to make in-place changes - * that fill out the numa_meminfo memblks. + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. */ for (int i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - if (start < bi->end && end >= bi->start) { + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { blk[count] = &mi->blk[i]; count++; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b69aee6245e4..7913440c0fd4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -432,7 +432,7 @@ static void emit_return(u8 **pprog, u8 *ip) u8 *prog = *pprog; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { - emit_jump(&prog, &__x86_return_thunk, ip); + emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ if (IS_ENABLED(CONFIG_SLS)) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 2925074b9a58..9a5b101c4502 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -653,6 +653,14 @@ static void print_absolute_relocs(void) if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } + /* + * Do not perform relocations in .notes section; any + * values there are meant for pre-boot consumption (e.g. + * startup_xen). + */ + if (sec_applies->shdr.sh_type == SHT_NOTE) { + continue; + } sh_symtab = sec_symtab->symtab; sym_strtab = sec_symtab->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4b0d6fff88de..1fb9a1644d94 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu) char *resched_name, *callfunc_name, *debug_name; resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); + if (!resched_name) + goto fail_mem; per_cpu(xen_resched_irq, cpu).name = resched_name; rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu, @@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu) per_cpu(xen_resched_irq, cpu).irq = rc; callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); + if (!callfunc_name) + goto fail_mem; per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, cpu, @@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu) if (!xen_fifo_events) { debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); + if (!debug_name) + goto fail_mem; + per_cpu(xen_debug_irq, cpu).name = debug_name; rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, @@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu) } callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); + if (!callfunc_name) + goto fail_mem; + per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, cpu, @@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu) return 0; + fail_mem: + rc = -ENOMEM; fail: xen_smp_intr_free(cpu); return rc; diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h index c812bf85021c..46c8596259d2 100644 --- a/arch/xtensa/include/asm/jump_label.h +++ b/arch/xtensa/include/asm/jump_label.h @@ -13,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "_nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, * make it reachable and wrap both into a no-transform block * to avoid any assembler interference with this. */ - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" ".begin no-transform\n\t" "_j %l[l_yes]\n\t" "2:\n\t" diff --git a/block/bio.c b/block/bio.c index 6c22dd7b6f27..3318e0022fdf 100644 --- a/block/bio.c +++ b/block/bio.c @@ -927,7 +927,7 @@ static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio, if ((addr1 | mask) != (addr2 | mask)) return false; - if (bv->bv_len + len > queue_max_segment_size(q)) + if (len > queue_max_segment_size(q) - bv->bv_len) return false; return __bio_try_merge_page(bio, page, len, offset, same_page); } @@ -1112,19 +1112,16 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) struct folio_iter fi; bio_for_each_folio_all(fi, bio) { - struct page *page; - size_t done = 0; + size_t nr_pages; if (mark_dirty) { folio_lock(fi.folio); folio_mark_dirty(fi.folio); folio_unlock(fi.folio); } - page = folio_page(fi.folio, fi.offset / PAGE_SIZE); - do { - folio_put(fi.folio); - done += PAGE_SIZE; - } while (done < fi.length); + nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE - + fi.offset / PAGE_SIZE + 1; + folio_put_refs(fi.folio, nr_pages); } } EXPORT_SYMBOL_GPL(__bio_release_pages); diff --git a/block/blk-core.c b/block/blk-core.c index 6eaf2b0ad7cc..aefdf07bdc2c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -864,7 +864,16 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) */ blk_flush_plug(current->plug, false); - if (bio_queue_enter(bio)) + /* + * We need to be able to enter a frozen queue, similar to how + * timeouts also need to do that. If that is blocked, then we can + * have pending IO when a queue freeze is started, and then the + * wait for the freeze to finish will wait for polled requests to + * timeout as the poller is preventer from entering the queue and + * completing them. As long as we prevent new IO from being queued, + * that should be all that matters. + */ + if (!percpu_ref_tryget(&q->q_usage_counter)) return 0; if (queue_is_mq(q)) { ret = blk_mq_poll(q, cookie, iob, flags); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 7dd6a33e1d6a..e6557024e3da 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1337,6 +1337,13 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) lockdep_assert_held(&iocg->waitq.lock); + /* + * If the delay is set by another CPU, we may be in the past. No need to + * change anything if so. This avoids decay calculation underflow. + */ + if (time_before64(now->now, iocg->delay_at)) + return false; + /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; if (iocg->delay) diff --git a/block/blk-map.c b/block/blk-map.c index 66da9e2b19ab..b337ae347bfa 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -203,12 +203,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, /* * success */ - if ((iov_iter_rw(iter) == WRITE && - (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { + if (iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else if (map_data && map_data->from_user) { + struct iov_iter iter2 = *iter; + + /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ + iter2.data_source = ITER_SOURCE; + ret = bio_copy_from_iter(bio, &iter2); + if (ret) + goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); diff --git a/block/blk-mq.c b/block/blk-mq.c index b3f99dda4530..e1b12f3d54bd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -675,6 +675,22 @@ out_queue_exit: } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); +static void blk_mq_finish_request(struct request *rq) +{ + struct request_queue *q = rq->q; + + if ((rq->rq_flags & RQF_ELVPRIV) && + q->elevator->type->ops.finish_request) { + q->elevator->type->ops.finish_request(rq); + /* + * For postflush request that may need to be + * completed twice, we should clear this flag + * to avoid double finish_request() on the rq. + */ + rq->rq_flags &= ~RQF_ELVPRIV; + } +} + static void __blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; @@ -701,9 +717,7 @@ void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; - if ((rq->rq_flags & RQF_ELVPRIV) && - q->elevator->type->ops.finish_request) - q->elevator->type->ops.finish_request(rq); + blk_mq_finish_request(rq); if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->disk->bdi); @@ -1020,6 +1034,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) if (blk_mq_need_time_stamp(rq)) __blk_mq_end_request_acct(rq, ktime_get_ns()); + blk_mq_finish_request(rq); + if (rq->end_io) { rq_qos_done(rq->q, rq); if (rq->end_io(rq, error) == RQ_END_IO_FREE) @@ -1074,6 +1090,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) if (iob->need_ts) __blk_mq_end_request_acct(rq, now); + blk_mq_finish_request(rq); + rq_qos_done(rq->q, rq); /* @@ -1859,6 +1877,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, wait->flags &= ~WQ_FLAG_EXCLUSIVE; __add_wait_queue(wq, wait); + /* + * Add one explicit barrier since blk_mq_get_driver_tag() may + * not imply barrier in case of failure. + * + * Order adding us to wait queue and allocating driver tag. + * + * The pair is the one implied in sbitmap_queue_wake_up() which + * orders clearing sbitmap tag bits and waitqueue_active() in + * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless + * + * Otherwise, re-order of adding wait queue and getting driver tag + * may cause __sbitmap_queue_wake_up() to wake up nothing because + * the waitqueue_active() may not observe us in wait queue. + */ + smp_mb(); + /* * It's possible that a tag was freed in the window between the * allocation failure and adding the hardware queue to the wait diff --git a/block/blk-settings.c b/block/blk-settings.c index bbca4ce77a2d..c702f408bbc0 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -680,6 +680,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); t->zoned = max(t->zoned, b->zoned); + if (!t->zoned) { + t->zone_write_granularity = 0; + t->max_zone_append_sectors = 0; + } return ret; } EXPORT_SYMBOL(blk_stack_limits); diff --git a/block/ioctl.c b/block/ioctl.c index ebe4a2653622..47567ba1185a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -20,8 +20,6 @@ static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition p; sector_t start, length; - if (disk->flags & GENHD_FL_NO_PART) - return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 55e26065c2e2..f10c2a0d18d4 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -622,9 +622,8 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; - unsigned int shift = tags->bitmap_tags.sb.shift; - dd->async_depth = max(1U, 3 * (1U << shift) / 4); + dd->async_depth = max(1UL, 3 * q->nr_requests / 4); sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth); } diff --git a/block/opal_proto.h b/block/opal_proto.h index 7152aa1f1a49..7f306b08a0fe 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -71,6 +71,7 @@ enum opal_response_token { #define SHORT_ATOM_BYTE 0xBF #define MEDIUM_ATOM_BYTE 0xDF #define LONG_ATOM_BYTE 0xE3 +#define EMPTY_ATOM_BYTE 0xFF #define OPAL_INVAL_PARAM 12 #define OPAL_MANUFACTURED_INACTIVE 0x08 diff --git a/block/partitions/core.c b/block/partitions/core.c index b8112f52d388..3927f4283f6b 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -453,6 +453,11 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, goto out; } + if (disk->flags & GENHD_FL_NO_PART) { + ret = -EINVAL; + goto out; + } + if (partition_overlaps(disk, start, length, -1)) { ret = -EBUSY; goto out; diff --git a/block/sed-opal.c b/block/sed-opal.c index 9bdb833e5817..25e4ce452c1d 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -935,16 +935,20 @@ static int response_parse(const u8 *buf, size_t length, token_length = response_parse_medium(iter, pos); else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */ token_length = response_parse_long(iter, pos); + else if (pos[0] == EMPTY_ATOM_BYTE) /* empty atom */ + token_length = 1; else /* TOKEN */ token_length = response_parse_token(iter, pos); if (token_length < 0) return token_length; + if (pos[0] != EMPTY_ATOM_BYTE) + num_entries++; + pos += token_length; total -= token_length; iter++; - num_entries++; } resp->num = num_entries; diff --git a/crypto/Kconfig b/crypto/Kconfig index d779667671b2..edf193aff23e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1285,10 +1285,11 @@ config CRYPTO_JITTERENTROPY A non-physical non-deterministic ("true") RNG (e.g., an entropy source compliant with NIST SP800-90B) intended to provide a seed to a - deterministic RNG (e.g. per NIST SP800-90C). + deterministic RNG (e.g., per NIST SP800-90C). This RNG does not perform any cryptographic whitening of the generated + random numbers. - See https://www.chronox.de/jent.html + See https://www.chronox.de/jent/ config CRYPTO_KDF800108_CTR tristate diff --git a/crypto/algapi.c b/crypto/algapi.c index 5dc9ccdd5a51..c73d1359b9d4 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -290,6 +290,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) } if (!strcmp(q->cra_driver_name, alg->cra_name) || + !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c index eea2a2fa4f01..45f906103133 100644 --- a/drivers/accessibility/speakup/synth.c +++ b/drivers/accessibility/speakup/synth.c @@ -208,8 +208,10 @@ void spk_do_flush(void) wake_up_process(speakup_task); } -void synth_write(const char *buf, size_t count) +void synth_write(const char *_buf, size_t count) { + const unsigned char *buf = (const unsigned char *) _buf; + while (count--) synth_buffer_add(*buf++); synth_start(); diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 088db2356998..0a84d5afd37c 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -308,9 +308,10 @@ err: static void __exit extlog_exit(void) { mce_unregister_decode_chain(&extlog_mce_dec); - ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; - if (extlog_l1_addr) + if (extlog_l1_addr) { + ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; acpi_os_unmap_iomem(extlog_l1_addr, l1_size); + } if (elog_addr) acpi_os_unmap_iomem(elog_addr, elog_size); release_mem_region(elog_base, elog_size); diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index f7852fb75ab3..756ab8edde83 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -513,6 +513,15 @@ static const struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3350"), }, }, + { + .callback = video_set_report_key_events, + .driver_data = (void *)((uintptr_t)REPORT_BRIGHTNESS_KEY_EVENTS), + .ident = "COLORFUL X15 AT 23", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "COLORFUL"), + DMI_MATCH(DMI_PRODUCT_NAME, "X15 AT 23"), + }, + }, /* * Some machines change the brightness themselves when a brightness * hotkey gets pressed, despite us telling them not to. In this case diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9952f3a792ba..dd808cf65c84 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -99,6 +99,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes) return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; } +/* + * A platform may describe one error source for the handling of synchronous + * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI + * or External Interrupt). On x86, the HEST notifications are always + * asynchronous, so only SEA on ARM is delivered as a synchronous + * notification. + */ +static inline bool is_hest_sync_notify(struct ghes *ghes) +{ + u8 notify_type = ghes->generic->notify.type; + + return notify_type == ACPI_HEST_NOTIFY_SEA; +} + /* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain @@ -461,7 +475,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags) } static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, - int sev) + int sev, bool sync) { int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); @@ -475,7 +489,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) flags = MF_SOFT_OFFLINE; if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) - flags = 0; + flags = sync ? MF_ACTION_REQUIRED : 0; if (flags != -1) return ghes_do_memory_failure(mem_err->physical_addr, flags); @@ -483,9 +497,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, return false; } -static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev) +static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, + int sev, bool sync) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + int flags = sync ? MF_ACTION_REQUIRED : 0; bool queued = false; int sec_sev, i; char *p; @@ -510,7 +526,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s * and don't filter out 'corrected' error here. */ if (is_cache && has_pa) { - queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0); + queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); p += err_info->length; continue; } @@ -631,6 +647,7 @@ static bool ghes_do_proc(struct ghes *ghes, const guid_t *fru_id = &guid_null; char *fru_text = ""; bool queued = false; + bool sync = is_hest_sync_notify(ghes); sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -648,13 +665,13 @@ static bool ghes_do_proc(struct ghes *ghes, ghes_edac_report_mem_error(sev, mem_err); arch_apei_report_mem_error(sev, mem_err); - queued = ghes_handle_memory_failure(gdata, sev); + queued = ghes_handle_memory_failure(gdata, sev, sync); } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { ghes_handle_aer(gdata); } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { - queued = ghes_handle_arm_hw_error(gdata, sev); + queued = ghes_handle_arm_hw_error(gdata, sev, sync); } else { void *err = acpi_hest_get_payload(gdata); diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 12f330b0eac0..b57de78fbf14 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -183,7 +183,7 @@ static int __init slit_valid(struct acpi_table_slit *slit) int i, j; int d = slit->locality_count; for (i = 0; i < d; i++) { - for (j = 0; j < d; j++) { + for (j = 0; j < d; j++) { u8 val = slit->entry[d*i + j]; if (i == j) { if (val != LOCAL_DISTANCE) @@ -532,7 +532,7 @@ int __init acpi_numa_init(void) */ /* fake_pxm is the next unused PXM value after SRAT parsing */ - for (i = 0, fake_pxm = -1; i < MAX_NUMNODES - 1; i++) { + for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) { if (node_to_pxm_map[i] > fake_pxm) fake_pxm = node_to_pxm_map[i]; } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index fc5b5b2c9e81..6f613eef2887 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1431,6 +1431,8 @@ int acpi_processor_power_exit(struct acpi_processor *pr) acpi_processor_registered--; if (acpi_processor_registered == 0) cpuidle_unregister_driver(&acpi_idle_driver); + + kfree(dev); } pr->flags.power_setup_done = 0; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 5ebeb0d7b6be..1c5c1a269fbe 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -543,6 +543,39 @@ static const struct dmi_system_id lg_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "17U70P"), }, }, + { + /* Infinity E15-5A165-BM */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM5RG1E0009COM"), + }, + }, + { + /* Infinity E15-5A305-1M */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM5RGEE0016COM"), + }, + }, + { + /* Lunnen Ground 15 / AMD Ryzen 5 5500U */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lunnen"), + DMI_MATCH(DMI_BOARD_NAME, "LLL5DAW"), + }, + }, + { + /* Lunnen Ground 16 / AMD Ryzen 7 5800U */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lunnen"), + DMI_MATCH(DMI_BOARD_NAME, "LL6FA"), + }, + }, + { + /* MAIBENBEN X577 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MAIBENBEN"), + DMI_MATCH(DMI_BOARD_NAME, "X577"), + }, + }, { } }; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 94154a849a3e..293cdf486fd8 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -315,18 +315,14 @@ static int acpi_scan_device_check(struct acpi_device *adev) * again). */ if (adev->handler) { - dev_warn(&adev->dev, "Already enumerated\n"); - return -EALREADY; + dev_dbg(&adev->dev, "Already enumerated\n"); + return 0; } error = acpi_bus_scan(adev->handle); if (error) { dev_warn(&adev->dev, "Namespace scan failure\n"); return error; } - if (!adev->handler) { - dev_warn(&adev->dev, "Enumeration failure\n"); - error = -ENODEV; - } } else { error = acpi_scan_device_not_present(adev); } diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d933ef6cc65a..55cd17a13e75 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -477,6 +477,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread, { WARN_ON(!list_empty(&thread->waiting_thread_node)); binder_enqueue_work_ilocked(work, &thread->todo); + + /* (e)poll-based threads require an explicit wakeup signal when + * queuing their own work; they rely on these events to consume + * messages without I/O block. Without it, threads risk waiting + * indefinitely without handling the work. + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL && + thread->pid == current->pid && !thread->process_todo) + wake_up_interruptible_sync(&thread->wait); + thread->process_todo = true; } diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 805645efb3cc..17119e8dc8c3 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -49,6 +49,7 @@ enum { enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, + board_ahci_43bit_dma, board_ahci_ign_iferr, board_ahci_low_power, board_ahci_no_debounce_delay, @@ -129,6 +130,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_43bit_dma] = { + AHCI_HFLAGS (AHCI_HFLAG_43BIT_ONLY), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_ign_iferr] = { AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), .flags = AHCI_FLAG_COMMON, @@ -597,14 +605,19 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ - /* Asmedia */ - { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci }, /* ASM1061 */ - { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ - { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ - { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ - { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + /* ASMedia */ + { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ + { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ + { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */ + { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */ + { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ + { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ + { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ + { PCI_VDEVICE(ASMEDIA, 0x1165), board_ahci }, /* ASM1165 */ + { PCI_VDEVICE(ASMEDIA, 0x1166), board_ahci }, /* ASM1166 */ /* * Samsung SSDs found on some macbooks. NCQ times out if MSI is @@ -944,11 +957,20 @@ static int ahci_pci_device_resume(struct device *dev) #endif /* CONFIG_PM */ -static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac) +static int ahci_configure_dma_masks(struct pci_dev *pdev, + struct ahci_host_priv *hpriv) { - const int dma_bits = using_dac ? 64 : 32; + int dma_bits; int rc; + if (hpriv->cap & HOST_CAP_64) { + dma_bits = 64; + if (hpriv->flags & AHCI_HFLAG_43BIT_ONLY) + dma_bits = 43; + } else { + dma_bits = 32; + } + /* * If the device fixup already set the dma_mask to some non-standard * value, don't extend it here. This happens on STA2X11, for example. @@ -1921,7 +1943,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_gtf_filter_workaround(host); /* initialize adapter */ - rc = ahci_configure_dma_masks(pdev, hpriv->cap & HOST_CAP_64); + rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) return rc; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index ff8e6ae1c636..f9c5906a8afa 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -247,6 +247,7 @@ enum { AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during suspend/resume */ AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */ + AHCI_HFLAG_43BIT_ONLY = BIT(29), /* 43bit DMA addr limit */ /* ap->flags bits */ diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index cb24ecf36faf..50e07ea60e45 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -88,7 +88,6 @@ struct ceva_ahci_priv { u32 axicc; bool is_cci_enabled; int flags; - struct reset_control *rst; }; static unsigned int ceva_ahci_read_id(struct ata_device *dev, @@ -189,6 +188,60 @@ static struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), }; +static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc, i; + + rc = ahci_platform_enable_regulators(hpriv); + if (rc) + return rc; + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + /* Assert the controller reset */ + rc = ahci_platform_assert_rsts(hpriv); + if (rc) + goto disable_clks; + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_init(hpriv->phys[i]); + if (rc) + goto disable_rsts; + } + + /* De-assert the controller reset */ + ahci_platform_deassert_rsts(hpriv); + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_power_on(hpriv->phys[i]); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + } + + return 0; + +disable_rsts: + ahci_platform_deassert_rsts(hpriv); + +disable_phys: + while (--i >= 0) { + phy_power_off(hpriv->phys[i]); + phy_exit(hpriv->phys[i]); + } + +disable_clks: + ahci_platform_disable_clks(hpriv); + +disable_regulator: + ahci_platform_disable_regulators(hpriv); + + return rc; +} + static int ceva_ahci_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev) return -ENOMEM; cevapriv->ahci_pdev = pdev; - - cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, - NULL); - if (IS_ERR(cevapriv->rst)) - dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst), - "failed to get reset\n"); - hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); - if (!cevapriv->rst) { - rc = ahci_platform_enable_resources(hpriv); - if (rc) - return rc; - } else { - int i; + hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev, + NULL); + if (IS_ERR(hpriv->rsts)) + return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts), + "failed to get reset\n"); - rc = ahci_platform_enable_clks(hpriv); - if (rc) - return rc; - /* Assert the controller reset */ - reset_control_assert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_init(hpriv->phys[i]); - if (rc) - return rc; - } - - /* De-assert the controller reset */ - reset_control_deassert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_power_on(hpriv->phys[i]); - if (rc) { - phy_exit(hpriv->phys[i]); - return rc; - } - } - } + rc = ceva_ahci_platform_enable_resources(hpriv); + if (rc) + return rc; if (of_property_read_bool(np, "ceva,broken-gen2")) cevapriv->flags = CEVA_FLAG_BROKEN_GEN2; @@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev) if (of_property_read_u8_array(np, "ceva,p0-cominit-params", (u8 *)&cevapriv->pp2c[0], 4) < 0) { dev_warn(dev, "ceva,p0-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-cominit-params", (u8 *)&cevapriv->pp2c[1], 4) < 0) { dev_warn(dev, "ceva,p1-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read OOB timing value for COMWAKE from device-tree*/ if (of_property_read_u8_array(np, "ceva,p0-comwake-params", (u8 *)&cevapriv->pp3c[0], 4) < 0) { dev_warn(dev, "ceva,p0-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-comwake-params", (u8 *)&cevapriv->pp3c[1], 4) < 0) { dev_warn(dev, "ceva,p1-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy BURST timing value from device-tree */ if (of_property_read_u8_array(np, "ceva,p0-burst-params", (u8 *)&cevapriv->pp4c[0], 4) < 0) { dev_warn(dev, "ceva,p0-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-burst-params", (u8 *)&cevapriv->pp4c[1], 4) < 0) { dev_warn(dev, "ceva,p1-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy RETRY interval timing value from device-tree */ if (of_property_read_u16_array(np, "ceva,p0-retry-params", (u16 *)&cevapriv->pp5c[0], 2) < 0) { dev_warn(dev, "ceva,p0-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u16_array(np, "ceva,p1-retry-params", (u16 *)&cevapriv->pp5c[1], 2) < 0) { dev_warn(dev, "ceva,p1-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* @@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - rc = ahci_platform_enable_resources(hpriv); + rc = ceva_ahci_platform_enable_resources(hpriv); if (rc) return rc; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fa2fc1953fc2..f14e56a5cff6 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2005,6 +2005,10 @@ void ata_dev_power_set_active(struct ata_device *dev) struct ata_taskfile tf; unsigned int err_mask; + /* If the device is already sleeping, do nothing. */ + if (dev->flags & ATA_DFLAG_SLEEPING) + return; + /* * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only * if supported by the device. diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 1eaaf01418ea..b8034d194078 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -711,8 +711,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_ncq_enabled |= 1 << devno; /* If we are resuming, wake up the device */ - if (ap->pflags & ATA_PFLAG_RESUMING) + if (ap->pflags & ATA_PFLAG_RESUMING) { + dev->flags |= ATA_DFLAG_RESUMING; ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; + } } } @@ -3089,6 +3091,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, return 0; err: + dev->flags &= ~ATA_DFLAG_RESUMING; *r_failed_dev = dev; return rc; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a9da2f05e629..a09548630fc8 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4652,6 +4652,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool do_resume; int ret = 0; mutex_lock(&ap->scsi_scan_mutex); @@ -4673,7 +4674,15 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (scsi_device_get(sdev)) continue; + do_resume = dev->flags & ATA_DFLAG_RESUMING; + spin_unlock_irqrestore(ap->lock, flags); + if (do_resume) { + ret = scsi_resume_device(sdev); + if (ret == -EWOULDBLOCK) + goto unlock; + dev->flags &= ~ATA_DFLAG_RESUMING; + } ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 49cb4537344a..2daf50d4cd47 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -2930,6 +2930,8 @@ open_card_ubr0(struct idt77252_dev *card) vc->scq = alloc_scq(card, vc->class); if (!vc->scq) { printk("%s: can't get SCQ.\n", card->name); + kfree(card->vcs[0]); + card->vcs[0] = NULL; return -ENOMEM; } diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index eaa31e567d1e..5b59d133b6af 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -144,7 +144,7 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static int __init early_cpu_to_node(int cpu) +int __init early_cpu_to_node(int cpu) { return cpu_to_node_map[cpu]; } diff --git a/drivers/base/core.c b/drivers/base/core.c index af90bfb0cc3d..3078f44dc186 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -337,10 +337,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target) return false; } +#define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ + DL_FLAG_CYCLE | \ + DL_FLAG_MANAGED) static inline bool device_link_flag_is_sync_state_only(u32 flags) { - return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) == - (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED); + return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } /** @@ -2054,9 +2056,14 @@ static int fw_devlink_create_devlink(struct device *con, /* * SYNC_STATE_ONLY device links don't block probing and supports cycles. - * So cycle detection isn't necessary and shouldn't be done. + * So, one might expect that cycle detection isn't necessary for them. + * However, if the device link was marked as SYNC_STATE_ONLY because + * it's part of a cycle, then we still need to do cycle detection. This + * is because the consumer and supplier might be part of multiple cycles + * and we need to detect all those cycles. */ - if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) { + if (!device_link_flag_is_sync_state_only(flags) || + flags & DL_FLAG_CYCLE) { device_links_write_lock(); if (__fw_devlink_relax_cycles(con, sup_handle)) { __fwnode_link_cycle(link); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index dab70a65377c..31da94afe4f3 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -589,6 +589,12 @@ ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -602,6 +608,7 @@ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); +static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -617,6 +624,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_retbleed.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_spec_rstack_overflow.attr, + &dev_attr_reg_file_data_sampling.attr, NULL }; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 56ceba469802..d238b47f74c3 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1052,7 +1052,7 @@ static int __init genpd_power_off_unused(void) return 0; } -late_initcall(genpd_power_off_unused); +late_initcall_sync(genpd_power_off_unused); #ifdef CONFIG_PM_SLEEP diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c50139207794..9c5a5f4dba5a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *dev) } /** - * device_resume_noirq - Execute a "noirq resume" callback for given device. + * __device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) +static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -655,7 +655,13 @@ Skip: Out: complete_all(&dev->power.completion); TRACE_RESUME(error); - return error; + + if (error) { + suspend_stats.failed_resume_noirq++; + dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); + } } static bool is_async(struct device *dev) @@ -668,27 +674,35 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) { reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(func, dev); + if (!is_async(dev)) + return false; + + get_device(dev); + + if (async_schedule_dev_nocall(func, dev)) return true; - } + + put_device(dev); return false; } static void async_resume_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; - int error; - - error = device_resume_noirq(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + struct device *dev = data; + __device_resume_noirq(dev, pm_transition, true); put_device(dev); } +static void device_resume_noirq(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_noirq)) + return; + + __device_resume_noirq(dev, pm_transition, false); +} + static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; @@ -698,14 +712,6 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) - dpm_async_fn(dev, async_resume_noirq); - while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); get_device(dev); @@ -713,17 +719,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_noirq(dev, state, false); - if (error) { - suspend_stats.failed_resume_noirq++; - dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " noirq", error); - } - } + device_resume_noirq(dev); put_device(dev); @@ -751,14 +747,14 @@ void dpm_resume_noirq(pm_message_t state) } /** - * device_resume_early - Execute an "early resume" callback for given device. + * __device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ -static int device_resume_early(struct device *dev, pm_message_t state, bool async) +static void __device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -811,21 +807,31 @@ Out: pm_runtime_enable(dev); complete_all(&dev->power.completion); - return error; + + if (error) { + suspend_stats.failed_resume_early++; + dpm_save_failed_step(SUSPEND_RESUME_EARLY); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async early" : " early", error); + } } static void async_resume_early(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; - int error; - - error = device_resume_early(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + struct device *dev = data; + __device_resume_early(dev, pm_transition, true); put_device(dev); } +static void device_resume_early(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_early)) + return; + + __device_resume_early(dev, pm_transition, false); +} + /** * dpm_resume_early - Execute "early resume" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -839,14 +845,6 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) - dpm_async_fn(dev, async_resume_early); - while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); get_device(dev); @@ -854,17 +852,7 @@ void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_early(dev, state, false); - if (error) { - suspend_stats.failed_resume_early++; - dpm_save_failed_step(SUSPEND_RESUME_EARLY); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " early", error); - } - } + device_resume_early(dev); put_device(dev); @@ -888,12 +876,12 @@ void dpm_resume_start(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_start); /** - * device_resume - Execute "resume" callbacks for given device. + * __device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. */ -static int device_resume(struct device *dev, pm_message_t state, bool async) +static void __device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -975,20 +963,30 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) TRACE_RESUME(error); - return error; + if (error) { + suspend_stats.failed_resume++; + dpm_save_failed_step(SUSPEND_RESUME); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async" : "", error); + } } static void async_resume(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; - int error; + struct device *dev = data; - error = device_resume(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume(dev, pm_transition, true); put_device(dev); } +static void device_resume(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume)) + return; + + __device_resume(dev, pm_transition, false); +} + /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1008,27 +1006,17 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; - list_for_each_entry(dev, &dpm_suspended_list, power.entry) - dpm_async_fn(dev, async_resume); - while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); + get_device(dev); - if (!is_async(dev)) { - int error; - mutex_unlock(&dpm_list_mtx); + mutex_unlock(&dpm_list_mtx); - error = device_resume(dev, state, false); - if (error) { - suspend_stats.failed_resume++; - dpm_save_failed_step(SUSPEND_RESUME); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, "", error); - } + device_resume(dev); + + mutex_lock(&dpm_list_mtx); - mutex_lock(&dpm_list_mtx); - } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); @@ -1269,7 +1257,7 @@ Complete: static void async_suspend_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_noirq(dev, pm_transition, true); @@ -1450,7 +1438,7 @@ Complete: static void async_suspend_late(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_late(dev, pm_transition, true); @@ -1727,7 +1715,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) static void async_suspend(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend(dev, pm_transition, true); diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 72b7a92337b1..cd6e559648b2 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -120,7 +120,7 @@ static unsigned int read_magic_time(void) struct rtc_time time; unsigned int val; - if (mc146818_get_time(&time) < 0) { + if (mc146818_get_time(&time, 1000) < 0) { pr_err("Unable to read current time from RTC\n"); return 0; } diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index afd094dec5ca..ca0c092ba47f 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -362,8 +362,10 @@ void dev_pm_enable_wake_irq_complete(struct device *dev) return; if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED && - wirq->status & WAKE_IRQ_DEDICATED_REVERSE) + wirq->status & WAKE_IRQ_DEDICATED_REVERSE) { enable_irq(wirq->irq); + wirq->status |= WAKE_IRQ_DEDICATED_ENABLED; + } } /** diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 128722cf6c3c..827802e418dd 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct blk_mq_tag_set *set; + sector_t ssize; ulong flags; int late = 0; int err; @@ -395,7 +396,7 @@ aoeblk_gdalloc(void *vp) gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - set_capacity(gd, d->ssize); + ssize = d->ssize; snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); @@ -404,6 +405,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); + set_capacity(gd, ssize); + err = device_add_disk(NULL, gd, aoe_attr_groups); if (err) goto out_disk_cleanup; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d7317425be51..cc9077b588d7 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -419,13 +419,16 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu rcu_read_lock(); for_each_netdev_rcu(&init_net, ifp) { dev_hold(ifp); - if (!is_aoe_netif(ifp)) - goto cont; + if (!is_aoe_netif(ifp)) { + dev_put(ifp); + continue; + } skb = new_skb(sizeof *h + sizeof *ch); if (skb == NULL) { printk(KERN_INFO "aoe: skb alloc failure\n"); - goto cont; + dev_put(ifp); + continue; } skb_put(skb, sizeof *h + sizeof *ch); skb->dev = ifp; @@ -440,9 +443,6 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu h->major = cpu_to_be16(aoemajor); h->minor = aoeminor; h->cmd = AOECMD_CFG; - -cont: - dev_put(ifp); } rcu_read_unlock(); } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 63773a90581d..1e66c7a188a1 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -64,6 +64,7 @@ tx(int id) __must_hold(&txlock) pr_warn("aoe: packet could not be sent on %s. %s\n", ifp ? ifp->name : "netif", "consider increasing tx_queue_len"); + dev_put(ifp); spin_lock_irq(&txlock); } return 0; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 8037aaefeb2e..5c4be8dda253 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -494,7 +494,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, struct iov_iter *iter, int msg_flags, int *sent) { int result; - struct msghdr msg; + struct msghdr msg = { }; unsigned int noreclaim_flag; if (unlikely(!sock)) { @@ -509,10 +509,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, noreclaim_flag = memalloc_noreclaim_save(); do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (send) @@ -2412,6 +2408,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) } dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); + if (!dev_list) { + nlmsg_free(reply); + ret = -EMSGSIZE; + goto out; + } + if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) { diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index afc92869cba4..f58ca9ce3503 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3453,14 +3453,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req) static void rbd_lock_del_request(struct rbd_img_request *img_req) { struct rbd_device *rbd_dev = img_req->rbd_dev; - bool need_wakeup; + bool need_wakeup = false; lockdep_assert_held(&rbd_dev->lock_rwsem); spin_lock(&rbd_dev->lock_lists_lock); - rbd_assert(!list_empty(&img_req->lock_item)); - list_del_init(&img_req->lock_item); - need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && - list_empty(&rbd_dev->running_list)); + if (!list_empty(&img_req->lock_item)) { + list_del_init(&img_req->lock_item); + need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && + list_empty(&rbd_dev->running_list)); + } spin_unlock(&rbd_dev->lock_lists_lock); if (need_wakeup) complete(&rbd_dev->releasing_wait); @@ -3843,14 +3844,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result) return; } - list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) { + while (!list_empty(&rbd_dev->acquiring_list)) { + img_req = list_first_entry(&rbd_dev->acquiring_list, + struct rbd_img_request, lock_item); mutex_lock(&img_req->state_mutex); rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); + if (!result) + list_move_tail(&img_req->lock_item, + &rbd_dev->running_list); + else + list_del_init(&img_req->lock_item); rbd_img_schedule(img_req, result); mutex_unlock(&img_req->state_mutex); } - - list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list); } static bool locker_equal(const struct ceph_locker *lhs, diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 2cfed2e58d64..ad451224e663 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -587,6 +587,7 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess, { char *full_path; char *a, *b; + int len; full_path = kmalloc(PATH_MAX, GFP_KERNEL); if (!full_path) @@ -598,19 +599,19 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess, */ a = strnstr(dev_search_path, "%SESSNAME%", sizeof(dev_search_path)); if (a) { - int len = a - dev_search_path; + len = a - dev_search_path; len = snprintf(full_path, PATH_MAX, "%.*s/%s/%s", len, dev_search_path, srv_sess->sessname, dev_name); - if (len >= PATH_MAX) { - pr_err("Too long path: %s, %s, %s\n", - dev_search_path, srv_sess->sessname, dev_name); - kfree(full_path); - return ERR_PTR(-EINVAL); - } } else { - snprintf(full_path, PATH_MAX, "%s/%s", - dev_search_path, dev_name); + len = snprintf(full_path, PATH_MAX, "%s/%s", + dev_search_path, dev_name); + } + if (len >= PATH_MAX) { + pr_err("Too long path: %s, %s, %s\n", + dev_search_path, srv_sess->sessname, dev_name); + kfree(full_path); + return ERR_PTR(-EINVAL); } /* eliminitate duplicated slashes */ diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3124837aa406..505026f0025c 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1206,14 +1206,15 @@ static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + /* Ensure no requests in virtqueues before deleting vqs. */ + blk_mq_freeze_queue(vblk->disk->queue); + /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); - blk_mq_quiesce_queue(vblk->disk->queue); - vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -1231,7 +1232,7 @@ static int virtblk_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - blk_mq_unquiesce_queue(vblk->disk->queue); + blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index c9064d34d830..0211f704a358 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev) bt_dev_dbg(hdev, "QCA Patch config"); skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd), - cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + cmd, 0, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err); @@ -594,27 +594,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Firmware files to download are based on ROM version. * ROM version is derived from last two bytes of soc_ver. */ - rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); + if (soc_type == QCA_WCN3988) + rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f); + else + rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); if (soc_type == QCA_WCN6750) qca_send_patch_config_cmd(hdev); /* Download rampatch file */ config.type = TLV_TYPE_PATCH; - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: snprintf(config.fwname, sizeof(config.fwname), "qca/crbtfw%02x.tlv", rom_ver); - } else if (soc_type == QCA_QCA6390) { + break; + case QCA_WCN3988: + snprintf(config.fwname, sizeof(config.fwname), + "qca/apbtfw%02x.tlv", rom_ver); + break; + case QCA_QCA6390: snprintf(config.fwname, sizeof(config.fwname), "qca/htbtfw%02x.tlv", rom_ver); - } else if (soc_type == QCA_WCN6750) { + break; + case QCA_WCN6750: /* Choose mbn file by default.If mbn file is not found * then choose tlv file */ config.type = ELF_TYPE_PATCH; snprintf(config.fwname, sizeof(config.fwname), "qca/msbtfw%02x.mbn", rom_ver); - } else { + break; + case QCA_WCN6855: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpbtfw%02x.tlv", rom_ver); + break; + case QCA_WCN7850: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hmtbtfw%02x.tlv", rom_ver); + break; + default: snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); } @@ -630,27 +651,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Download NVM configuration */ config.type = TLV_TYPE_NVM; - if (firmware_name) + if (firmware_name) { snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); - else if (qca_is_wcn399x(soc_type)) { - if (ver.soc_id == QCA_WCN3991_SOC_ID) { + } else { + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crnv%02xu.bin", rom_ver); + } else { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crnv%02x.bin", rom_ver); + } + break; + case QCA_WCN3988: snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02xu.bin", rom_ver); - } else { + "qca/apnv%02x.bin", rom_ver); + break; + case QCA_QCA6390: snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02x.bin", rom_ver); + "qca/htnv%02x.bin", rom_ver); + break; + case QCA_WCN6750: + snprintf(config.fwname, sizeof(config.fwname), + "qca/msnv%02x.bin", rom_ver); + break; + case QCA_WCN6855: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpnv%02x.bin", rom_ver); + break; + case QCA_WCN7850: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hmtnv%02x.bin", rom_ver); + break; + + default: + snprintf(config.fwname, sizeof(config.fwname), + "qca/nvm_%08x.bin", soc_ver); } } - else if (soc_type == QCA_QCA6390) - snprintf(config.fwname, sizeof(config.fwname), - "qca/htnv%02x.bin", rom_ver); - else if (soc_type == QCA_WCN6750) - snprintf(config.fwname, sizeof(config.fwname), - "qca/msnv%02x.bin", rom_ver); - else - snprintf(config.fwname, sizeof(config.fwname), - "qca/nvm_%08x.bin", soc_ver); err = qca_download_firmware(hdev, &config, soc_type, rom_ver); if (err < 0) { @@ -658,16 +700,25 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - if (soc_type >= QCA_WCN3991) { + switch (soc_type) { + case QCA_WCN3991: + case QCA_QCA6390: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: err = qca_disable_soc_logging(hdev); if (err < 0) return err; + break; + default: + break; } /* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the * VsMsftOpCode. */ switch (soc_type) { + case QCA_WCN3988: case QCA_WCN3990: case QCA_WCN3991: case QCA_WCN3998: @@ -685,11 +736,18 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) { + switch (soc_type) { + case QCA_WCN3991: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: /* get fw build info */ err = qca_read_fw_build_info(hdev); if (err < 0) return err; + break; + default: + break; } bt_dev_info(hdev, "QCA setup on UART is completed"); diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 61e9a50e66ae..03bff5c0059d 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -142,11 +142,14 @@ enum qca_btsoc_type { QCA_INVALID = -1, QCA_AR3002, QCA_ROME, + QCA_WCN3988, QCA_WCN3990, QCA_WCN3998, QCA_WCN3991, QCA_QCA6390, QCA_WCN6750, + QCA_WCN6855, + QCA_WCN7850, }; #if IS_ENABLED(CONFIG_BT_QCA) @@ -159,16 +162,6 @@ int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); -static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 || - soc_type == QCA_WCN3998; -} -static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN6750; -} - #else static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) @@ -196,16 +189,6 @@ static inline int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) return -EOPNOTSUPP; } -static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) -{ - return false; -} - -static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) -{ - return false; -} - static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 45dffd2cbc71..2acda547f4f3 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,6 +7,7 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -606,9 +607,18 @@ static int qca_open(struct hci_uart *hu) if (hu->serdev) { qcadev = serdev_device_get_drvdata(hu->serdev); - if (qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type)) + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: hu->init_speed = qcadev->init_speed; + break; + + default: + break; + } if (qcadev->oper_speed) hu->oper_speed = qcadev->oper_speed; @@ -1314,11 +1324,20 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); /* Give the controller time to process the request */ - if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu))) + switch (qca_soc_type(hu)) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: usleep_range(1000, 10000); - else + break; + + default: msleep(300); + } return 0; } @@ -1391,12 +1410,20 @@ static unsigned int qca_get_speed(struct hci_uart *hu, static int qca_check_speeds(struct hci_uart *hu) { - if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu))) { + switch (qca_soc_type(hu)) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; - } else { + break; + + default: if (!qca_get_speed(hu, QCA_INIT_SPEED) || !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1425,13 +1452,29 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) /* Disable flow control for wcn3990 to deassert RTS while * changing the baudrate of chip and host. */ - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: hci_uart_set_flow_control(hu, true); + break; - if (soc_type == QCA_WCN3990) { + default: + break; + } + + switch (soc_type) { + case QCA_WCN3990: reinit_completion(&qca->drop_ev_comp); set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); + break; + + default: + break; } qca_baudrate = qca_get_baudrate_value(speed); @@ -1443,11 +1486,23 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) host_set_baudrate(hu, speed); error: - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: hci_uart_set_flow_control(hu, false); + break; - if (soc_type == QCA_WCN3990) { + default: + break; + } + + switch (soc_type) { + case QCA_WCN3990: /* Wait for the controller to send the vendor event * for the baudrate change command. */ @@ -1459,6 +1514,10 @@ error: } clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); + break; + + default: + break; } } @@ -1620,12 +1679,20 @@ static int qca_regulator_init(struct hci_uart *hu) } } - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: /* Forcefully enable wcn399x to enter in to boot mode. */ host_set_baudrate(hu, 2400); ret = qca_send_power_pulse(hu, false); if (ret) return ret; + break; + + default: + break; } /* For wcn6750 need to enable gpio bt_en */ @@ -1642,10 +1709,18 @@ static int qca_regulator_init(struct hci_uart *hu) qca_set_speed(hu, QCA_INIT_SPEED); - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: ret = qca_send_power_pulse(hu, true); if (ret) return ret; + break; + + default: + break; } /* Now the device is in ready state to communicate with host. @@ -1679,10 +1754,18 @@ static int qca_power_on(struct hci_dev *hdev) if (!hu->serdev) return 0; - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: ret = qca_regulator_init(hu); - } else { + break; + + default: qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bt_en) { gpiod_set_value_cansleep(qcadev->bt_en, 1); @@ -1705,6 +1788,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + const char *soc_name; ret = qca_check_speeds(hu); if (ret) @@ -1719,9 +1803,30 @@ static int qca_setup(struct hci_uart *hu) */ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - bt_dev_info(hdev, "setting up %s", - qca_is_wcn399x(soc_type) ? "wcn399x" : - (soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390"); + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + soc_name = "wcn399x"; + break; + + case QCA_WCN6750: + soc_name = "wcn6750"; + break; + + case QCA_WCN6855: + soc_name = "wcn6855"; + break; + + case QCA_WCN7850: + soc_name = "wcn7850"; + break; + + default: + soc_name = "ROME/QCA6390"; + } + bt_dev_info(hdev, "setting up %s", soc_name); qca->memdump_state = QCA_MEMDUMP_IDLE; @@ -1732,15 +1837,33 @@ retry: clear_bit(QCA_SSR_TRIGGERED, &qca->flags); - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) { - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: + + /* Set BDA quirk bit for reading BDA value from fwnode property + * only if that property exist in DT. + */ + if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); + } else { + bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); + } + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) goto out; - } else { + break; + + default: qca_set_speed(hu, QCA_INIT_SPEED); } @@ -1754,8 +1877,17 @@ retry: qca_baudrate = qca_get_baudrate_value(speed); } - if (!(qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type))) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: + break; + + default: /* Get QCA version information */ ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) @@ -1824,7 +1956,18 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; -static const struct qca_device_data qca_soc_data_wcn3990 = { +static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = { + .soc_type = QCA_WCN3988, + .vregs = (struct qca_vreg []) { + { "vddio", 15000 }, + { "vddxo", 80000 }, + { "vddrf", 300000 }, + { "vddch0", 450000 }, + }, + .num_vregs = 4, +}; + +static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { .soc_type = QCA_WCN3990, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1835,7 +1978,7 @@ static const struct qca_device_data qca_soc_data_wcn3990 = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_wcn3991 = { +static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = { .soc_type = QCA_WCN3991, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1847,7 +1990,7 @@ static const struct qca_device_data qca_soc_data_wcn3991 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn3998 = { +static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { .soc_type = QCA_WCN3998, .vregs = (struct qca_vreg []) { { "vddio", 10000 }, @@ -1858,12 +2001,13 @@ static const struct qca_device_data qca_soc_data_wcn3998 = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_qca6390 = { +static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { .soc_type = QCA_QCA6390, .num_vregs = 0, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn6750 = { +static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { .soc_type = QCA_WCN6750, .vregs = (struct qca_vreg []) { { "vddio", 5000 }, @@ -1880,6 +2024,34 @@ static const struct qca_device_data qca_soc_data_wcn6750 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; +static const struct qca_device_data qca_soc_data_wcn6855 = { + .soc_type = QCA_WCN6855, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, + { "vddbtcxmx", 126000 }, + { "vddrfacmn", 12500 }, + { "vddrfa0p8", 102000 }, + { "vddrfa1p7", 302000 }, + { "vddrfa1p2", 257000 }, + }, + .num_vregs = 6, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, +}; + +static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = { + .soc_type = QCA_WCN7850, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, + { "vddaon", 26000 }, + { "vdddig", 126000 }, + { "vddrfa0p8", 102000 }, + { "vddrfa1p2", 257000 }, + { "vddrfa1p9", 302000 }, + }, + .num_vregs = 6, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, +}; + static void qca_power_shutdown(struct hci_uart *hu) { struct qca_serdev *qcadev; @@ -1905,11 +2077,18 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else if (soc_type == QCA_WCN6750) { + break; + + case QCA_WCN6750: + case QCA_WCN6855: gpiod_set_value_cansleep(qcadev->bt_en, 0); msleep(100); qca_regulator_disable(qcadev); @@ -1917,7 +2096,9 @@ static void qca_power_shutdown(struct hci_uart *hu) sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl); bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state); } - } else if (qcadev->bt_en) { + break; + + default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } @@ -2042,10 +2223,19 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); - if (data && - (qca_is_wcn399x(data->soc_type) || - qca_is_wcn6750(data->soc_type))) { + if (data) qcadev->btsoc_type = data->soc_type; + else + qcadev->btsoc_type = QCA_ROME; + + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2064,14 +2254,19 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) { + if (IS_ERR(qcadev->bt_en) && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); power_ctrl_enabled = false; } qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); - if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750) + if (IS_ERR(qcadev->sw_ctrl) && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855 || + data->soc_type == QCA_WCN7850)) dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); @@ -2085,15 +2280,12 @@ static int qca_serdev_probe(struct serdev_device *serdev) BT_ERR("wcn3990 serdev registration failed"); return err; } - } else { - if (data) - qcadev->btsoc_type = data->soc_type; - else - qcadev->btsoc_type = QCA_ROME; + break; + default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en)) { + if (IS_ERR(qcadev->bt_en)) { dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); power_ctrl_enabled = false; } @@ -2146,12 +2338,24 @@ static void qca_serdev_remove(struct serdev_device *serdev) struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); struct qca_power *power = qcadev->bt_power; - if ((qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type)) && - power->vregs_on) - qca_power_shutdown(&qcadev->serdev_hu); - else if (qcadev->susclk) - clk_disable_unprepare(qcadev->susclk); + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: + if (power->vregs_on) { + qca_power_shutdown(&qcadev->serdev_hu); + break; + } + fallthrough; + + default: + if (qcadev->susclk) + clk_disable_unprepare(qcadev->susclk); + } hci_uart_unregister_device(&qcadev->serdev_hu); } @@ -2328,10 +2532,13 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,qca6174-bt" }, { .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390}, { .compatible = "qcom,qca9377-bt" }, + { .compatible = "qcom,wcn3988-bt", .data = &qca_soc_data_wcn3988}, { .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990}, { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, + { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, + { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 7bfe998f3514..bdc763390550 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -186,11 +186,12 @@ config SUNXI_RSB config TEGRA_ACONNECT tristate "Tegra ACONNECT Bus Driver" - depends on ARCH_TEGRA_210_SOC + depends on ARCH_TEGRA depends on OF && PM help Driver for the Tegra ACONNECT bus which is used to interface with - the devices inside the Audio Processing Engine (APE) for Tegra210. + the devices inside the Audio Processing Engine (APE) for + Tegra210 and later. config TEGRA_GMI tristate "Tegra Generic Memory Interface bus driver" diff --git a/drivers/bus/mhi/ep/main.c b/drivers/bus/mhi/ep/main.c index edd153dda40c..34e0ba6f52d0 100644 --- a/drivers/bus/mhi/ep/main.c +++ b/drivers/bus/mhi/ep/main.c @@ -71,45 +71,77 @@ err_unlock: static int mhi_ep_send_completion_event(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_ring *ring, struct mhi_ring_element *tre, u32 len, enum mhi_ev_ccs code) { - struct mhi_ring_element event = {}; + struct mhi_ring_element *event; + int ret; - event.ptr = cpu_to_le64(ring->rbase + ring->rd_offset * sizeof(*tre)); - event.dword[0] = MHI_TRE_EV_DWORD0(code, len); - event.dword[1] = MHI_TRE_EV_DWORD1(ring->ch_id, MHI_PKT_TYPE_TX_EVENT); + event = kzalloc(sizeof(struct mhi_ring_element), GFP_KERNEL); + if (!event) + return -ENOMEM; - return mhi_ep_send_event(mhi_cntrl, ring->er_index, &event, MHI_TRE_DATA_GET_BEI(tre)); + event->ptr = cpu_to_le64(ring->rbase + ring->rd_offset * sizeof(*tre)); + event->dword[0] = MHI_TRE_EV_DWORD0(code, len); + event->dword[1] = MHI_TRE_EV_DWORD1(ring->ch_id, MHI_PKT_TYPE_TX_EVENT); + + ret = mhi_ep_send_event(mhi_cntrl, ring->er_index, event, MHI_TRE_DATA_GET_BEI(tre)); + kfree(event); + + return ret; } int mhi_ep_send_state_change_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_state state) { - struct mhi_ring_element event = {}; + struct mhi_ring_element *event; + int ret; - event.dword[0] = MHI_SC_EV_DWORD0(state); - event.dword[1] = MHI_SC_EV_DWORD1(MHI_PKT_TYPE_STATE_CHANGE_EVENT); + event = kzalloc(sizeof(struct mhi_ring_element), GFP_KERNEL); + if (!event) + return -ENOMEM; - return mhi_ep_send_event(mhi_cntrl, 0, &event, 0); + event->dword[0] = MHI_SC_EV_DWORD0(state); + event->dword[1] = MHI_SC_EV_DWORD1(MHI_PKT_TYPE_STATE_CHANGE_EVENT); + + ret = mhi_ep_send_event(mhi_cntrl, 0, event, 0); + kfree(event); + + return ret; } int mhi_ep_send_ee_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_ee_type exec_env) { - struct mhi_ring_element event = {}; + struct mhi_ring_element *event; + int ret; - event.dword[0] = MHI_EE_EV_DWORD0(exec_env); - event.dword[1] = MHI_SC_EV_DWORD1(MHI_PKT_TYPE_EE_EVENT); + event = kzalloc(sizeof(struct mhi_ring_element), GFP_KERNEL); + if (!event) + return -ENOMEM; - return mhi_ep_send_event(mhi_cntrl, 0, &event, 0); + event->dword[0] = MHI_EE_EV_DWORD0(exec_env); + event->dword[1] = MHI_SC_EV_DWORD1(MHI_PKT_TYPE_EE_EVENT); + + ret = mhi_ep_send_event(mhi_cntrl, 0, event, 0); + kfree(event); + + return ret; } static int mhi_ep_send_cmd_comp_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_ev_ccs code) { struct mhi_ep_ring *ring = &mhi_cntrl->mhi_cmd->ring; - struct mhi_ring_element event = {}; + struct mhi_ring_element *event; + int ret; - event.ptr = cpu_to_le64(ring->rbase + ring->rd_offset * sizeof(struct mhi_ring_element)); - event.dword[0] = MHI_CC_EV_DWORD0(code); - event.dword[1] = MHI_CC_EV_DWORD1(MHI_PKT_TYPE_CMD_COMPLETION_EVENT); + event = kzalloc(sizeof(struct mhi_ring_element), GFP_KERNEL); + if (!event) + return -ENOMEM; - return mhi_ep_send_event(mhi_cntrl, 0, &event, 0); + event->ptr = cpu_to_le64(ring->rbase + ring->rd_offset * sizeof(struct mhi_ring_element)); + event->dword[0] = MHI_CC_EV_DWORD0(code); + event->dword[1] = MHI_CC_EV_DWORD1(MHI_PKT_TYPE_CMD_COMPLETION_EVENT); + + ret = mhi_ep_send_event(mhi_cntrl, 0, event, 0); + kfree(event); + + return ret; } static int mhi_ep_process_cmd_ring(struct mhi_ep_ring *ring, struct mhi_ring_element *el) diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c index 0c3a009ed9bb..8378c3319cd5 100644 --- a/drivers/bus/mhi/host/main.c +++ b/drivers/bus/mhi/host/main.c @@ -268,7 +268,8 @@ static void mhi_del_ring_element(struct mhi_controller *mhi_cntrl, static bool is_valid_ring_ptr(struct mhi_ring *ring, dma_addr_t addr) { - return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len; + return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len && + !(addr & (sizeof(struct mhi_ring_element) - 1)); } int mhi_destroy_device(struct device *dev, void *data) @@ -642,6 +643,8 @@ static int parse_xfer_event(struct mhi_controller *mhi_cntrl, mhi_del_ring_element(mhi_cntrl, tre_ring); local_rp = tre_ring->rp; + read_unlock_bh(&mhi_chan->lock); + /* notify client */ mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result); @@ -667,6 +670,8 @@ static int parse_xfer_event(struct mhi_controller *mhi_cntrl, kfree(buf_info->cb_buf); } } + + read_lock_bh(&mhi_chan->lock); } break; } /* CC_EOT */ @@ -1119,17 +1124,15 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info, if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))) return -EIO; - read_lock_irqsave(&mhi_cntrl->pm_lock, flags); - ret = mhi_is_ring_full(mhi_cntrl, tre_ring); - if (unlikely(ret)) { - ret = -EAGAIN; - goto exit_unlock; - } + if (unlikely(ret)) + return -EAGAIN; ret = mhi_gen_tre(mhi_cntrl, mhi_chan, buf_info, mflags); if (unlikely(ret)) - goto exit_unlock; + return ret; + + read_lock_irqsave(&mhi_cntrl->pm_lock, flags); /* Packet is queued, take a usage ref to exit M3 if necessary * for host->device buffer, balanced put is done on buffer completion @@ -1149,7 +1152,6 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info, if (dir == DMA_FROM_DEVICE) mhi_cntrl->runtime_put(mhi_cntrl); -exit_unlock: read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags); return ret; @@ -1201,6 +1203,9 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, int eot, eob, chain, bei; int ret; + /* Protect accesses for reading and incrementing WP */ + write_lock_bh(&mhi_chan->lock); + buf_ring = &mhi_chan->buf_ring; tre_ring = &mhi_chan->tre_ring; @@ -1218,8 +1223,10 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, if (!info->pre_mapped) { ret = mhi_cntrl->map_single(mhi_cntrl, buf_info); - if (ret) + if (ret) { + write_unlock_bh(&mhi_chan->lock); return ret; + } } eob = !!(flags & MHI_EOB); @@ -1236,6 +1243,8 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, mhi_add_ring_element(mhi_cntrl, tre_ring); mhi_add_ring_element(mhi_cntrl, buf_ring); + write_unlock_bh(&mhi_chan->lock); + return 0; } diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 5eb0fe73ddc4..79fc96c8d836 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -830,6 +830,12 @@ static void moxtet_remove(struct spi_device *spi) mutex_destroy(&moxtet->lock); } +static const struct spi_device_id moxtet_spi_ids[] = { + { "moxtet" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, moxtet_spi_ids); + static const struct of_device_id moxtet_dt_ids[] = { { .compatible = "cznic,moxtet" }, {}, @@ -841,6 +847,7 @@ static struct spi_driver moxtet_spi_driver = { .name = "moxtet", .of_match_table = moxtet_dt_ids, }, + .id_table = moxtet_spi_ids, .probe = moxtet_probe, .remove = moxtet_remove, }; diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 8f31f9d81030..c319f9937709 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -24,10 +24,13 @@ #include #include #include +#include #include #define RNG_MODULE_NAME "hw_random" +#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES) + static struct hwrng *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; @@ -59,7 +62,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static size_t rng_buffer_size(void) { - return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; + return RNG_BUFFER_SIZE; } static void add_early_randomness(struct hwrng *rng) @@ -211,6 +214,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { + u8 buffer[RNG_BUFFER_SIZE]; ssize_t ret = 0; int err = 0; int bytes_read, len; @@ -238,34 +242,37 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading; - } - data_avail = bytes_read; - } - - if (!data_avail) { - if (filp->f_flags & O_NONBLOCK) { + } else if (bytes_read == 0 && + (filp->f_flags & O_NONBLOCK)) { err = -EAGAIN; goto out_unlock_reading; } - } else { - len = data_avail; + + data_avail = bytes_read; + } + + len = data_avail; + if (len) { if (len > size) len = size; data_avail -= len; - if (copy_to_user(buf + ret, rng_buffer + data_avail, - len)) { + memcpy(buffer, rng_buffer + data_avail, len); + } + mutex_unlock(&reading_mutex); + put_rng(rng); + + if (len) { + if (copy_to_user(buf + ret, buffer, len)) { err = -EFAULT; - goto out_unlock_reading; + goto out; } size -= len; ret += len; } - mutex_unlock(&reading_mutex); - put_rng(rng); if (need_resched()) schedule_timeout_interruptible(1); @@ -276,6 +283,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out: + memzero_explicit(buffer, sizeof(buffer)); return ret ? : err; out_unlock_reading: diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 2fcd155eb688..3bd5f7d24062 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -407,6 +407,9 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) if (IS_ERR(hw)) return ERR_CAST(hw); + if (!hw) + return NULL; + return hw->core; } diff --git a/drivers/clk/hisilicon/clk-hi3519.c b/drivers/clk/hisilicon/clk-hi3519.c index ad0c7f350cf0..60d8a27a9082 100644 --- a/drivers/clk/hisilicon/clk-hi3519.c +++ b/drivers/clk/hisilicon/clk-hi3519.c @@ -130,7 +130,7 @@ static void hi3519_clk_unregister(struct platform_device *pdev) of_clk_del_provider(pdev->dev.of_node); hisi_clk_unregister_gate(hi3519_gate_clks, - ARRAY_SIZE(hi3519_mux_clks), + ARRAY_SIZE(hi3519_gate_clks), crg->clk_data); hisi_clk_unregister_mux(hi3519_mux_clks, ARRAY_SIZE(hi3519_mux_clks), diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c index 9ea1a80acbe8..0272276550ff 100644 --- a/drivers/clk/hisilicon/clk-hi3559a.c +++ b/drivers/clk/hisilicon/clk-hi3559a.c @@ -491,7 +491,6 @@ static void hisi_clk_register_pll(struct hi3559av100_pll_clock *clks, clk = clk_register(NULL, &p_clk->hw); if (IS_ERR(clk)) { - devm_kfree(dev, p_clk); dev_err(dev, "%s: failed to register clock %s\n", __func__, clks[i].name); continue; diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c index a3d04c7c3da8..eb9c139babc3 100644 --- a/drivers/clk/hisilicon/clk-hi3620.c +++ b/drivers/clk/hisilicon/clk-hi3620.c @@ -467,8 +467,10 @@ static void __init hi3620_mmc_clk_init(struct device_node *node) return; clk_data->clks = kcalloc(num, sizeof(*clk_data->clks), GFP_KERNEL); - if (!clk_data->clks) + if (!clk_data->clks) { + kfree(clk_data); return; + } for (i = 0; i < num; i++) { struct hisi_mmc_clock *mmc_clk = &hi3620_mmc_clks[i]; diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index 273de1f29307..1066ea16de62 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -67,6 +67,22 @@ static const char * const lcd_pxl_sels[] = { "lcd_pxl_bypass_div_clk", }; +static const char *const lvds0_sels[] = { + "clk_dummy", + "clk_dummy", + "clk_dummy", + "clk_dummy", + "mipi0_lvds_bypass_clk", +}; + +static const char *const lvds1_sels[] = { + "clk_dummy", + "clk_dummy", + "clk_dummy", + "clk_dummy", + "mipi1_lvds_bypass_clk", +}; + static const char * const mipi_sels[] = { "clk_dummy", "clk_dummy", @@ -201,9 +217,9 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) /* MIPI-LVDS SS */ imx_clk_scu("mipi0_bypass_clk", IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_BYPASS); imx_clk_scu("mipi0_pixel_clk", IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_PER); - imx_clk_scu("mipi0_lvds_pixel_clk", IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_MISC2); imx_clk_scu("mipi0_lvds_bypass_clk", IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_BYPASS); - imx_clk_scu("mipi0_lvds_phy_clk", IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_MISC3); + imx_clk_scu2("mipi0_lvds_pixel_clk", lvds0_sels, ARRAY_SIZE(lvds0_sels), IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_MISC2); + imx_clk_scu2("mipi0_lvds_phy_clk", lvds0_sels, ARRAY_SIZE(lvds0_sels), IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_MISC3); imx_clk_scu2("mipi0_dsi_tx_esc_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_MST_BUS); imx_clk_scu2("mipi0_dsi_rx_esc_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_SLV_BUS); imx_clk_scu2("mipi0_dsi_phy_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_PHY); @@ -213,9 +229,9 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) imx_clk_scu("mipi1_bypass_clk", IMX_SC_R_MIPI_1, IMX_SC_PM_CLK_BYPASS); imx_clk_scu("mipi1_pixel_clk", IMX_SC_R_MIPI_1, IMX_SC_PM_CLK_PER); - imx_clk_scu("mipi1_lvds_pixel_clk", IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_MISC2); imx_clk_scu("mipi1_lvds_bypass_clk", IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_BYPASS); - imx_clk_scu("mipi1_lvds_phy_clk", IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_MISC3); + imx_clk_scu2("mipi1_lvds_pixel_clk", lvds1_sels, ARRAY_SIZE(lvds1_sels), IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_MISC2); + imx_clk_scu2("mipi1_lvds_phy_clk", lvds1_sels, ARRAY_SIZE(lvds1_sels), IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_MISC3); imx_clk_scu2("mipi1_dsi_tx_esc_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_1, IMX_SC_PM_CLK_MST_BUS); imx_clk_scu2("mipi1_dsi_rx_esc_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_1, IMX_SC_PM_CLK_SLV_BUS); diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c index 2ad3801398dc..7802dabb26f6 100644 --- a/drivers/clk/meson/axg.c +++ b/drivers/clk/meson/axg.c @@ -2144,7 +2144,9 @@ static struct clk_regmap *const axg_clk_regmaps[] = { &axg_vclk_input, &axg_vclk2_input, &axg_vclk_div, + &axg_vclk_div1, &axg_vclk2_div, + &axg_vclk2_div1, &axg_vclk_div2_en, &axg_vclk_div4_en, &axg_vclk_div6_en, diff --git a/drivers/clk/mmp/clk-of-pxa168.c b/drivers/clk/mmp/clk-of-pxa168.c index 130d1a723879..cb0ebbd82038 100644 --- a/drivers/clk/mmp/clk-of-pxa168.c +++ b/drivers/clk/mmp/clk-of-pxa168.c @@ -306,18 +306,21 @@ static void __init pxa168_clk_init(struct device_node *np) pxa_unit->mpmu_base = of_iomap(np, 0); if (!pxa_unit->mpmu_base) { pr_err("failed to map mpmu registers\n"); + kfree(pxa_unit); return; } pxa_unit->apmu_base = of_iomap(np, 1); if (!pxa_unit->apmu_base) { pr_err("failed to map apmu registers\n"); + kfree(pxa_unit); return; } pxa_unit->apbc_base = of_iomap(np, 2); if (!pxa_unit->apbc_base) { pr_err("failed to map apbc registers\n"); + kfree(pxa_unit); return; } diff --git a/drivers/clk/qcom/dispcc-sdm845.c b/drivers/clk/qcom/dispcc-sdm845.c index 735adfefc379..e792e0b130d3 100644 --- a/drivers/clk/qcom/dispcc-sdm845.c +++ b/drivers/clk/qcom/dispcc-sdm845.c @@ -759,6 +759,8 @@ static struct clk_branch disp_cc_mdss_vsync_clk = { static struct gdsc mdss_gdsc = { .gdscr = 0x3000, + .en_few_wait_val = 0x6, + .en_rest_wait_val = 0x5, .pd = { .name = "mdss_gdsc", }, diff --git a/drivers/clk/qcom/gcc-ipq6018.c b/drivers/clk/qcom/gcc-ipq6018.c index 4c5c7a8f41d0..b9844e41cf99 100644 --- a/drivers/clk/qcom/gcc-ipq6018.c +++ b/drivers/clk/qcom/gcc-ipq6018.c @@ -1557,6 +1557,7 @@ static struct clk_regmap_div nss_ubi0_div_clk_src = { static const struct freq_tbl ftbl_pcie_aux_clk_src[] = { F(24000000, P_XO, 1, 0, 0), + { } }; static const struct clk_parent_data gcc_xo_gpll0_core_pi_sleep_clk[] = { @@ -1737,6 +1738,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = { F(160000000, P_GPLL0, 5, 0, 0), F(216000000, P_GPLL6, 5, 0, 0), F(308570000, P_GPLL6, 3.5, 0, 0), + { } }; static const struct clk_parent_data gcc_xo_gpll0_gpll6_gpll0_div2[] = { diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index b2e83b38976e..b52c923a2fbc 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -973,6 +973,7 @@ static struct clk_rcg2 pcie0_axi_clk_src = { static const struct freq_tbl ftbl_pcie_aux_clk_src[] = { F(19200000, P_XO, 1, 0, 0), + { } }; static struct clk_rcg2 pcie0_aux_clk_src = { @@ -1078,6 +1079,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = { F(19200000, P_XO, 1, 0, 0), F(160000000, P_GPLL0, 5, 0, 0), F(308570000, P_GPLL6, 3.5, 0, 0), + { } }; static struct clk_rcg2 sdcc1_ice_core_clk_src = { diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index 6af08e0ca847..ef15e8f11402 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -4038,3 +4038,4 @@ module_exit(gcc_sdm845_exit); MODULE_DESCRIPTION("QTI GCC SDM845 Driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:gcc-sdm845"); +MODULE_SOFTDEP("pre: rpmhpd"); diff --git a/drivers/clk/qcom/mmcc-apq8084.c b/drivers/clk/qcom/mmcc-apq8084.c index e9f971359155..5f373c10ec6e 100644 --- a/drivers/clk/qcom/mmcc-apq8084.c +++ b/drivers/clk/qcom/mmcc-apq8084.c @@ -334,6 +334,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = { F(333430000, P_MMPLL1, 3.5, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), F(466800000, P_MMPLL1, 2.5, 0, 0), + { } }; static struct clk_rcg2 mmss_axi_clk_src = { @@ -358,6 +359,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = { F(150000000, P_GPLL0, 4, 0, 0), F(228570000, P_MMPLL0, 3.5, 0, 0), F(320000000, P_MMPLL0, 2.5, 0, 0), + { } }; static struct clk_rcg2 ocmemnoc_clk_src = { diff --git a/drivers/clk/qcom/mmcc-msm8974.c b/drivers/clk/qcom/mmcc-msm8974.c index 17ed52046170..eb2b0e2200d2 100644 --- a/drivers/clk/qcom/mmcc-msm8974.c +++ b/drivers/clk/qcom/mmcc-msm8974.c @@ -279,6 +279,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = { F(291750000, P_MMPLL1, 4, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), F(466800000, P_MMPLL1, 2.5, 0, 0), + { } }; static struct clk_rcg2 mmss_axi_clk_src = { @@ -303,6 +304,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = { F(150000000, P_GPLL0, 4, 0, 0), F(291750000, P_MMPLL1, 4, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), + { } }; static struct clk_rcg2 ocmemnoc_clk_src = { diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c index e45e32804d2c..d96c96a9089f 100644 --- a/drivers/clk/qcom/reset.c +++ b/drivers/clk/qcom/reset.c @@ -22,8 +22,8 @@ static int qcom_reset(struct reset_controller_dev *rcdev, unsigned long id) return 0; } -static int -qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) +static int qcom_reset_set_assert(struct reset_controller_dev *rcdev, + unsigned long id, bool assert) { struct qcom_reset_controller *rst; const struct qcom_reset_map *map; @@ -33,21 +33,22 @@ qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) map = &rst->reset_map[id]; mask = map->bitmask ? map->bitmask : BIT(map->bit); - return regmap_update_bits(rst->regmap, map->reg, mask, mask); + regmap_update_bits(rst->regmap, map->reg, mask, assert ? mask : 0); + + /* Read back the register to ensure write completion, ignore the value */ + regmap_read(rst->regmap, map->reg, &mask); + + return 0; } -static int -qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id) +static int qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) { - struct qcom_reset_controller *rst; - const struct qcom_reset_map *map; - u32 mask; + return qcom_reset_set_assert(rcdev, id, true); +} - rst = to_qcom_reset_controller(rcdev); - map = &rst->reset_map[id]; - mask = map->bitmask ? map->bitmask : BIT(map->bit); - - return regmap_update_bits(rst->regmap, map->reg, mask, 0); +static int qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id) +{ + return qcom_reset_set_assert(rcdev, id, false); } const struct reset_control_ops qcom_reset_ops = { diff --git a/drivers/clk/renesas/r8a779f0-cpg-mssr.c b/drivers/clk/renesas/r8a779f0-cpg-mssr.c index 27b668def357..7a49b91c9371 100644 --- a/drivers/clk/renesas/r8a779f0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779f0-cpg-mssr.c @@ -159,7 +159,7 @@ static const struct mssr_mod_clk r8a779f0_mod_clks[] __initconst = { DEF_MOD("cmt1", 911, R8A779F0_CLK_R), DEF_MOD("cmt2", 912, R8A779F0_CLK_R), DEF_MOD("cmt3", 913, R8A779F0_CLK_R), - DEF_MOD("pfc0", 915, R8A779F0_CLK_CL16M), + DEF_MOD("pfc0", 915, R8A779F0_CLK_CPEX), DEF_MOD("tsc", 919, R8A779F0_CLK_CL16M), DEF_MOD("ufs", 1514, R8A779F0_CLK_S0D4_HSC), }; diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index d5b325e3c539..e4c616921e5e 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -22,7 +22,7 @@ enum clk_ids { /* Core Clock Outputs exported to DT */ - LAST_DT_CORE_CLK = R8A779G0_CLK_R, + LAST_DT_CORE_CLK = R8A779G0_CLK_CP, /* External Input Clocks */ CLK_EXTAL, @@ -139,6 +139,7 @@ static const struct cpg_core_clk r8a779g0_core_clks[] __initconst = { DEF_FIXED("svd2_vip", R8A779G0_CLK_SVD2_VIP, CLK_SV_VIP, 2, 1), DEF_FIXED("cbfusa", R8A779G0_CLK_CBFUSA, CLK_EXTAL, 2, 1), DEF_FIXED("cpex", R8A779G0_CLK_CPEX, CLK_EXTAL, 2, 1), + DEF_FIXED("cp", R8A779G0_CLK_CP, CLK_EXTAL, 2, 1), DEF_FIXED("viobus", R8A779G0_CLK_VIOBUS, CLK_VIO, 1, 1), DEF_FIXED("viobusd2", R8A779G0_CLK_VIOBUSD2, CLK_VIO, 2, 1), DEF_FIXED("vcbus", R8A779G0_CLK_VCBUS, CLK_VC, 1, 1), @@ -169,10 +170,17 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("i2c4", 522, R8A779G0_CLK_S0D6_PER), DEF_MOD("i2c5", 523, R8A779G0_CLK_S0D6_PER), DEF_MOD("wdt1:wdt0", 907, R8A779G0_CLK_R), - DEF_MOD("pfc0", 915, R8A779G0_CLK_CL16M), - DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), - DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), - DEF_MOD("pfc3", 918, R8A779G0_CLK_CL16M), + DEF_MOD("cmt0", 910, R8A779G0_CLK_R), + DEF_MOD("cmt1", 911, R8A779G0_CLK_R), + DEF_MOD("cmt2", 912, R8A779G0_CLK_R), + DEF_MOD("cmt3", 913, R8A779G0_CLK_R), + DEF_MOD("pfc0", 915, R8A779G0_CLK_CP), + DEF_MOD("pfc1", 916, R8A779G0_CLK_CP), + DEF_MOD("pfc2", 917, R8A779G0_CLK_CP), + DEF_MOD("pfc3", 918, R8A779G0_CLK_CP), + DEF_MOD("tsc", 919, R8A779G0_CLK_CL16M), + DEF_MOD("ssiu", 2926, R8A779G0_CLK_S0D6_PER), + DEF_MOD("ssi", 2927, R8A779G0_CLK_S0D6_PER), }; /* diff --git a/drivers/clk/samsung/clk-exynos850.c b/drivers/clk/samsung/clk-exynos850.c index 541761e96aeb..87e463ad4274 100644 --- a/drivers/clk/samsung/clk-exynos850.c +++ b/drivers/clk/samsung/clk-exynos850.c @@ -572,7 +572,7 @@ static const struct samsung_div_clock apm_div_clks[] __initconst = { static const struct samsung_gate_clock apm_gate_clks[] __initconst = { GATE(CLK_GOUT_CLKCMU_CMGP_BUS, "gout_clkcmu_cmgp_bus", "dout_apm_bus", - CLK_CON_GAT_CLKCMU_CMGP_BUS, 21, 0, 0), + CLK_CON_GAT_CLKCMU_CMGP_BUS, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CLKCMU_CHUB_BUS, "gout_clkcmu_chub_bus", "mout_clkcmu_chub_bus", CLK_CON_GAT_GATE_CLKCMU_CHUB_BUS, 21, 0, 0), @@ -936,19 +936,19 @@ static const struct samsung_fixed_rate_clock cmgp_fixed_clks[] __initconst = { static const struct samsung_mux_clock cmgp_mux_clks[] __initconst = { MUX(CLK_MOUT_CMGP_ADC, "mout_cmgp_adc", mout_cmgp_adc_p, CLK_CON_MUX_CLK_CMGP_ADC, 0, 1), - MUX(CLK_MOUT_CMGP_USI0, "mout_cmgp_usi0", mout_cmgp_usi0_p, - CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP0, 0, 1), - MUX(CLK_MOUT_CMGP_USI1, "mout_cmgp_usi1", mout_cmgp_usi1_p, - CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP1, 0, 1), + MUX_F(CLK_MOUT_CMGP_USI0, "mout_cmgp_usi0", mout_cmgp_usi0_p, + CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP0, 0, 1, CLK_SET_RATE_PARENT, 0), + MUX_F(CLK_MOUT_CMGP_USI1, "mout_cmgp_usi1", mout_cmgp_usi1_p, + CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP1, 0, 1, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_div_clock cmgp_div_clks[] __initconst = { DIV(CLK_DOUT_CMGP_ADC, "dout_cmgp_adc", "gout_clkcmu_cmgp_bus", CLK_CON_DIV_DIV_CLK_CMGP_ADC, 0, 4), - DIV(CLK_DOUT_CMGP_USI0, "dout_cmgp_usi0", "mout_cmgp_usi0", - CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP0, 0, 5), - DIV(CLK_DOUT_CMGP_USI1, "dout_cmgp_usi1", "mout_cmgp_usi1", - CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP1, 0, 5), + DIV_F(CLK_DOUT_CMGP_USI0, "dout_cmgp_usi0", "mout_cmgp_usi0", + CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP0, 0, 5, CLK_SET_RATE_PARENT, 0), + DIV_F(CLK_DOUT_CMGP_USI1, "dout_cmgp_usi1", "mout_cmgp_usi1", + CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP1, 0, 5, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_gate_clock cmgp_gate_clks[] __initconst = { @@ -963,12 +963,12 @@ static const struct samsung_gate_clock cmgp_gate_clks[] __initconst = { "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_GPIO_PCLK, 21, CLK_IGNORE_UNUSED, 0), GATE(CLK_GOUT_CMGP_USI0_IPCLK, "gout_cmgp_usi0_ipclk", "dout_cmgp_usi0", - CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CMGP_USI0_PCLK, "gout_cmgp_usi0_pclk", "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_PCLK, 21, 0, 0), GATE(CLK_GOUT_CMGP_USI1_IPCLK, "gout_cmgp_usi1_ipclk", "dout_cmgp_usi1", - CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CMGP_USI1_PCLK, "gout_cmgp_usi1_pclk", "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_PCLK, 21, 0, 0), @@ -1409,8 +1409,9 @@ static const struct samsung_mux_clock peri_mux_clks[] __initconst = { mout_peri_uart_user_p, PLL_CON0_MUX_CLKCMU_PERI_UART_USER, 4, 1), MUX(CLK_MOUT_PERI_HSI2C_USER, "mout_peri_hsi2c_user", mout_peri_hsi2c_user_p, PLL_CON0_MUX_CLKCMU_PERI_HSI2C_USER, 4, 1), - MUX(CLK_MOUT_PERI_SPI_USER, "mout_peri_spi_user", mout_peri_spi_user_p, - PLL_CON0_MUX_CLKCMU_PERI_SPI_USER, 4, 1), + MUX_F(CLK_MOUT_PERI_SPI_USER, "mout_peri_spi_user", + mout_peri_spi_user_p, PLL_CON0_MUX_CLKCMU_PERI_SPI_USER, 4, 1, + CLK_SET_RATE_PARENT, 0), }; static const struct samsung_div_clock peri_div_clks[] __initconst = { @@ -1420,8 +1421,8 @@ static const struct samsung_div_clock peri_div_clks[] __initconst = { CLK_CON_DIV_DIV_CLK_PERI_HSI2C_1, 0, 5), DIV(CLK_DOUT_PERI_HSI2C2, "dout_peri_hsi2c2", "gout_peri_hsi2c2", CLK_CON_DIV_DIV_CLK_PERI_HSI2C_2, 0, 5), - DIV(CLK_DOUT_PERI_SPI0, "dout_peri_spi0", "mout_peri_spi_user", - CLK_CON_DIV_DIV_CLK_PERI_SPI_0, 0, 5), + DIV_F(CLK_DOUT_PERI_SPI0, "dout_peri_spi0", "mout_peri_spi_user", + CLK_CON_DIV_DIV_CLK_PERI_SPI_0, 0, 5, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_gate_clock peri_gate_clks[] __initconst = { @@ -1463,7 +1464,7 @@ static const struct samsung_gate_clock peri_gate_clks[] __initconst = { "mout_peri_bus_user", CLK_CON_GAT_GOUT_PERI_PWM_MOTOR_PCLK, 21, 0, 0), GATE(CLK_GOUT_SPI0_IPCLK, "gout_spi0_ipclk", "dout_peri_spi0", - CLK_CON_GAT_GOUT_PERI_SPI_0_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_PERI_SPI_0_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_SPI0_PCLK, "gout_spi0_pclk", "mout_peri_bus_user", CLK_CON_GAT_GOUT_PERI_SPI_0_PCLK, 21, 0, 0), GATE(CLK_GOUT_SYSREG_PERI_PCLK, "gout_sysreg_peri_pclk", diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 422d78247553..dcacc5064d33 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -21,24 +21,24 @@ #define MISC_CLK_ENB 0x48 #define OSC_CTRL 0x50 -#define OSC_CTRL_OSC_FREQ_MASK (3<<30) -#define OSC_CTRL_OSC_FREQ_13MHZ (0<<30) -#define OSC_CTRL_OSC_FREQ_19_2MHZ (1<<30) -#define OSC_CTRL_OSC_FREQ_12MHZ (2<<30) -#define OSC_CTRL_OSC_FREQ_26MHZ (3<<30) -#define OSC_CTRL_MASK (0x3f2 | OSC_CTRL_OSC_FREQ_MASK) +#define OSC_CTRL_OSC_FREQ_MASK (3u<<30) +#define OSC_CTRL_OSC_FREQ_13MHZ (0u<<30) +#define OSC_CTRL_OSC_FREQ_19_2MHZ (1u<<30) +#define OSC_CTRL_OSC_FREQ_12MHZ (2u<<30) +#define OSC_CTRL_OSC_FREQ_26MHZ (3u<<30) +#define OSC_CTRL_MASK (0x3f2u | OSC_CTRL_OSC_FREQ_MASK) -#define OSC_CTRL_PLL_REF_DIV_MASK (3<<28) -#define OSC_CTRL_PLL_REF_DIV_1 (0<<28) -#define OSC_CTRL_PLL_REF_DIV_2 (1<<28) -#define OSC_CTRL_PLL_REF_DIV_4 (2<<28) +#define OSC_CTRL_PLL_REF_DIV_MASK (3u<<28) +#define OSC_CTRL_PLL_REF_DIV_1 (0u<<28) +#define OSC_CTRL_PLL_REF_DIV_2 (1u<<28) +#define OSC_CTRL_PLL_REF_DIV_4 (2u<<28) #define OSC_FREQ_DET 0x58 -#define OSC_FREQ_DET_TRIG (1<<31) +#define OSC_FREQ_DET_TRIG (1u<<31) #define OSC_FREQ_DET_STATUS 0x5c -#define OSC_FREQ_DET_BUSY (1<<31) -#define OSC_FREQ_DET_CNT_MASK 0xFFFF +#define OSC_FREQ_DET_BUSYu (1<<31) +#define OSC_FREQ_DET_CNT_MASK 0xFFFFu #define TEGRA20_CLK_PERIPH_BANKS 3 diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c index 7bdeaff2bfd6..c28d3dacf0fb 100644 --- a/drivers/clk/zynq/clkc.c +++ b/drivers/clk/zynq/clkc.c @@ -42,6 +42,7 @@ static void __iomem *zynq_clkc_base; #define SLCR_SWDT_CLK_SEL (zynq_clkc_base + 0x204) #define NUM_MIO_PINS 54 +#define CLK_NAME_LEN 16 #define DBG_CLK_CTRL_CLKACT_TRC BIT(0) #define DBG_CLK_CTRL_CPU_1XCLKACT BIT(1) @@ -215,7 +216,7 @@ static void __init zynq_clk_setup(struct device_node *np) int i; u32 tmp; int ret; - char *clk_name; + char clk_name[CLK_NAME_LEN]; unsigned int fclk_enable = 0; const char *clk_output_name[clk_max]; const char *cpu_parents[4]; @@ -426,12 +427,10 @@ static void __init zynq_clk_setup(struct device_node *np) "gem1_emio_mux", CLK_SET_RATE_PARENT, SLCR_GEM1_CLK_CTRL, 0, 0, &gem1clk_lock); - tmp = strlen("mio_clk_00x"); - clk_name = kmalloc(tmp, GFP_KERNEL); for (i = 0; i < NUM_MIO_PINS; i++) { int idx; - snprintf(clk_name, tmp, "mio_clk_%2.2d", i); + snprintf(clk_name, CLK_NAME_LEN, "mio_clk_%2.2d", i); idx = of_property_match_string(np, "clock-names", clk_name); if (idx >= 0) can_mio_mux_parents[i] = of_clk_get_parent_name(np, @@ -439,7 +438,6 @@ static void __init zynq_clk_setup(struct device_node *np) else can_mio_mux_parents[i] = dummy_nm; } - kfree(clk_name); clk_register_mux(NULL, "can_mux", periph_parents, 4, CLK_SET_RATE_NO_REPARENT, SLCR_CAN_CLK_CTRL, 4, 2, 0, &canclk_lock); diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 44a61dc6f932..e1c773bb5535 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -32,7 +32,7 @@ #define GT_CONTROL_IRQ_ENABLE BIT(2) /* banked */ #define GT_CONTROL_AUTO_INC BIT(3) /* banked */ #define GT_CONTROL_PRESCALER_SHIFT 8 -#define GT_CONTROL_PRESCALER_MAX 0xF +#define GT_CONTROL_PRESCALER_MAX 0xFF #define GT_CONTROL_PRESCALER_MASK (GT_CONTROL_PRESCALER_MAX << \ GT_CONTROL_PRESCALER_SHIFT) diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c index 0b5c0af1cebf..626d53bf9146 100644 --- a/drivers/comedi/drivers/comedi_test.c +++ b/drivers/comedi/drivers/comedi_test.c @@ -85,6 +85,8 @@ struct waveform_private { struct comedi_device *dev; /* parent comedi device */ u64 ao_last_scan_time; /* time of previous AO scan in usec */ unsigned int ao_scan_period; /* AO scan period in usec */ + bool ai_timer_enable:1; /* should AI timer be running? */ + bool ao_timer_enable:1; /* should AO timer be running? */ unsigned short ao_loopbacks[N_CHANS]; }; @@ -234,8 +236,12 @@ static void waveform_ai_timer(struct timer_list *t) time_increment = devpriv->ai_convert_time - now; else time_increment = 1; - mod_timer(&devpriv->ai_timer, - jiffies + usecs_to_jiffies(time_increment)); + spin_lock(&dev->spinlock); + if (devpriv->ai_timer_enable) { + mod_timer(&devpriv->ai_timer, + jiffies + usecs_to_jiffies(time_increment)); + } + spin_unlock(&dev->spinlock); } overrun: @@ -391,9 +397,12 @@ static int waveform_ai_cmd(struct comedi_device *dev, * Seem to need an extra jiffy here, otherwise timer expires slightly * early! */ + spin_lock_bh(&dev->spinlock); + devpriv->ai_timer_enable = true; devpriv->ai_timer.expires = jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1; add_timer(&devpriv->ai_timer); + spin_unlock_bh(&dev->spinlock); return 0; } @@ -402,6 +411,9 @@ static int waveform_ai_cancel(struct comedi_device *dev, { struct waveform_private *devpriv = dev->private; + spin_lock_bh(&dev->spinlock); + devpriv->ai_timer_enable = false; + spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ del_timer(&devpriv->ai_timer); @@ -493,8 +505,12 @@ static void waveform_ao_timer(struct timer_list *t) unsigned int time_inc = devpriv->ao_last_scan_time + devpriv->ao_scan_period - now; - mod_timer(&devpriv->ao_timer, - jiffies + usecs_to_jiffies(time_inc)); + spin_lock(&dev->spinlock); + if (devpriv->ao_timer_enable) { + mod_timer(&devpriv->ao_timer, + jiffies + usecs_to_jiffies(time_inc)); + } + spin_unlock(&dev->spinlock); } underrun: @@ -515,9 +531,12 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev, async->inttrig = NULL; devpriv->ao_last_scan_time = ktime_to_us(ktime_get()); + spin_lock_bh(&dev->spinlock); + devpriv->ao_timer_enable = true; devpriv->ao_timer.expires = jiffies + usecs_to_jiffies(devpriv->ao_scan_period); add_timer(&devpriv->ao_timer); + spin_unlock_bh(&dev->spinlock); return 1; } @@ -602,6 +621,9 @@ static int waveform_ao_cancel(struct comedi_device *dev, struct waveform_private *devpriv = dev->private; s->async->inttrig = NULL; + spin_lock_bh(&dev->spinlock); + devpriv->ao_timer_enable = false; + spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ del_timer(&devpriv->ao_timer); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index edc294ee5a5b..90dcf26f0973 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -320,7 +320,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (target_perf < capacity) des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); - min_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); if (_min_perf < capacity) min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index b74289a95a17..bea41ccabf1f 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -14,10 +14,8 @@ #include #include #include +#include #include -#include -#include -#include #include #include #include diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index f644c5e325fb..552db816ed22 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -481,8 +481,11 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct private_data *priv = policy->driver_data; + struct private_data *priv; + if (!policy) + return 0; + priv = policy->driver_data; cpufreq_cpu_put(policy); return brcm_avs_get_frequency(priv->base); diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 0f8c6bbc85fb..646ec616ae52 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -215,7 +215,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) if (!priv) return -ENOMEM; - if (!alloc_cpumask_var(&priv->cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&priv->cpus, GFP_KERNEL)) return -ENOMEM; cpumask_set_cpu(cpu, priv->cpus); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fbe3a4098743..5771f3fc6115 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -493,6 +493,30 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ +static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq, + unsigned int relation) +{ + if (freq == cpu->pstate.turbo_freq) + return cpu->pstate.turbo_pstate; + + if (freq == cpu->pstate.max_freq) + return cpu->pstate.max_pstate; + + switch (relation) { + case CPUFREQ_RELATION_H: + return freq / cpu->pstate.scaling; + case CPUFREQ_RELATION_C: + return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling); + } + + return DIV_ROUND_UP(freq, cpu->pstate.scaling); +} + +static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq) +{ + return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L); +} + /** * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. @@ -510,6 +534,7 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); int scaling = cpu->pstate.scaling; + int freq; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); @@ -523,16 +548,16 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); - cpu->pstate.max_pstate_physical = - DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, - scaling); + freq = perf_ctl_max_phys * perf_ctl_scaling; + cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq); - cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; + freq = cpu->pstate.min_pstate * perf_ctl_scaling; + cpu->pstate.min_freq = freq; /* * Cast the min P-state value retrieved via pstate_funcs.get_min() to * the effective range of HWP performance levels. */ - cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling); + cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); } static inline void update_turbo_state(void) @@ -2493,13 +2518,12 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * abstract values to represent performance rather than pure ratios. */ if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) { - int scaling = cpu->pstate.scaling; int freq; freq = max_policy_perf * perf_ctl_scaling; - max_policy_perf = DIV_ROUND_UP(freq, scaling); + max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); freq = min_policy_perf * perf_ctl_scaling; - min_policy_perf = DIV_ROUND_UP(freq, scaling); + min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); } pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", @@ -2873,18 +2897,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, cpufreq_freq_transition_begin(policy, &freqs); - switch (relation) { - case CPUFREQ_RELATION_L: - target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); - break; - case CPUFREQ_RELATION_H: - target_pstate = freqs.new / cpu->pstate.scaling; - break; - default: - target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); - break; - } - + target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false); freqs.new = target_pstate * cpu->pstate.scaling; @@ -2902,7 +2915,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, update_turbo_state(); - target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); + target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); @@ -2939,6 +2952,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, if (min_pstate < cpu->min_perf_ratio) min_pstate = cpu->min_perf_ratio; + if (min_pstate > cpu->max_perf_ratio) + min_pstate = cpu->max_perf_ratio; + max_pstate = min(cap_pstate, cpu->max_perf_ratio); if (max_pstate < min_pstate) max_pstate = min_pstate; diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index f0e0a35c7f21..7f326bb5fd8d 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -10,8 +10,10 @@ #include #include #include -#include +#include #include +#include +#include #include #define LUT_MAX_ENTRIES 32U @@ -295,7 +297,23 @@ static struct cpufreq_driver cpufreq_mtk_hw_driver = { static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) { const void *data; - int ret; + int ret, cpu; + struct device *cpu_dev; + struct regulator *cpu_reg; + + /* Make sure that all CPU supplies are available before proceeding. */ + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, + "Failed to get cpu%d device\n", cpu); + + cpu_reg = devm_regulator_get(cpu_dev, "cpu"); + if (IS_ERR(cpu_reg)) + return dev_err_probe(&pdev->dev, PTR_ERR(cpu_reg), + "CPU%d regulator get failed\n", cpu); + } + data = of_device_get_match_data(&pdev->dev); if (!data) diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index e3313ce63b38..88afc49941b7 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 4fba3637b115..6f0c32592416 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index a577586b23be..cb03bfb0435e 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index fd2c16821d54..ac719aca49b7 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 1a63aeea8711..9c542e723a15 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index f64180dd2005..61ef653bcf56 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index d295f405c4bb..865e50164803 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index b8e02c3a1961..bbfb0f288dc3 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -515,10 +515,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init); static int __sev_platform_shutdown_locked(int *error) { - struct sev_device *sev = psp_master->sev_data; + struct psp_device *psp = psp_master; + struct sev_device *sev; int ret; - if (!sev || sev->state == SEV_STATE_UNINIT) + if (!psp || !psp->sev_data) + return 0; + + sev = psp->sev_data; + + if (sev->state == SEV_STATE_UNINIT) return 0; ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c index 71e5f79431af..6e4a78e1f3ce 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c @@ -419,8 +419,8 @@ int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri, return 0; free_iq: - otx2_cpt_free_instruction_queues(lfs); cptlf_hw_cleanup(lfs); + otx2_cpt_free_instruction_queues(lfs); detach_rsrcs: otx2_cpt_detach_rsrcs_msg(lfs); clear_lfs_num: @@ -431,11 +431,13 @@ EXPORT_SYMBOL_NS_GPL(otx2_cptlf_init, CRYPTO_DEV_OCTEONTX2_CPT); void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs) { - lfs->lfs_num = 0; /* Cleanup LFs hardware side */ cptlf_hw_cleanup(lfs); + /* Free instruction queues */ + otx2_cpt_free_instruction_queues(lfs); /* Send request to detach LFs */ otx2_cpt_detach_rsrcs_msg(lfs); + lfs->lfs_num = 0; } EXPORT_SYMBOL_NS_GPL(otx2_cptlf_shutdown, CRYPTO_DEV_OCTEONTX2_CPT); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 392e9fee05e8..6f3373f9928c 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -249,8 +249,11 @@ static void cptvf_lf_shutdown(struct otx2_cptlfs_info *lfs) otx2_cptlf_unregister_interrupts(lfs); /* Cleanup LFs software side */ lf_sw_cleanup(lfs); + /* Free instruction queues */ + otx2_cpt_free_instruction_queues(lfs); /* Send request to detach LFs */ otx2_cpt_detach_rsrcs_msg(lfs); + lfs->lfs_num = 0; } static int cptvf_lf_init(struct otx2_cptvf_dev *cptvf) diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index fe9bb2f3536a..4f36b5a9164a 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -95,18 +95,28 @@ static void adf_device_reset_worker(struct work_struct *work) if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) { /* The device hanged and we can't restart it so stop here */ dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - kfree(reset_data); + if (reset_data->mode == ADF_DEV_RESET_ASYNC || + completion_done(&reset_data->compl)) + kfree(reset_data); WARN(1, "QAT: device restart failed. Device is unusable\n"); return; } adf_dev_restarted_notify(accel_dev); clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); - /* The dev is back alive. Notify the caller if in sync mode */ - if (reset_data->mode == ADF_DEV_RESET_SYNC) - complete(&reset_data->compl); - else + /* + * The dev is back alive. Notify the caller if in sync mode + * + * If device restart will take a more time than expected, + * the schedule_reset() function can timeout and exit. This can be + * detected by calling the completion_done() function. In this case + * the reset_data structure needs to be freed here. + */ + if (reset_data->mode == ADF_DEV_RESET_ASYNC || + completion_done(&reset_data->compl)) kfree(reset_data); + else + complete(&reset_data->compl); } static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, @@ -139,8 +149,9 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, dev_err(&GET_DEV(accel_dev), "Reset device timeout expired\n"); ret = -EFAULT; + } else { + kfree(reset_data); } - kfree(reset_data); return ret; } return 0; diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index 90a920e7f664..c439be1650c8 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -104,7 +104,7 @@ static struct stm32_crc *stm32_crc_get_next_crc(void) struct stm32_crc *crc; spin_lock_bh(&crc_list.lock); - crc = list_first_entry(&crc_list.dev_list, struct stm32_crc, list); + crc = list_first_entry_or_null(&crc_list.dev_list, struct stm32_crc, list); if (crc) list_move_tail(&crc->list, &crc_list.dev_list); spin_unlock_bh(&crc_list.lock); diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 168195672e2e..d2df97cfcb29 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * } static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, - struct virtio_crypto_ctrl_header *header, void *para, + struct virtio_crypto_ctrl_header *header, + struct virtio_crypto_akcipher_session_para *para, const uint8_t *key, unsigned int keylen) { struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; @@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher ctrl = &vc_ctrl_req->ctrl; memcpy(&ctrl->header, header, sizeof(ctrl->header)); - memcpy(&ctrl->u, para, sizeof(ctrl->u)); + memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para)); input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c index bf1f421e05f2..74bd3eb63734 100644 --- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c +++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c @@ -231,7 +231,10 @@ static int zynqmp_handle_aes_req(struct crypto_engine *engine, err = zynqmp_aes_aead_cipher(areq); } + local_bh_disable(); crypto_finalize_aead_request(engine, areq, err); + local_bh_enable(); + return 0; } diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 003a44132418..5584af15300a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -376,9 +376,9 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, allowed++; } - if (!allowed) { - cxl_set_mem_enable(cxlds, 0); - info->mem_enabled = 0; + if (!allowed && info->mem_enabled) { + dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); + return -ENXIO; } /* diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 2f7187dbfa2d..5b7d848a6f01 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -450,7 +450,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_region_params *p = &cxlr->params; struct resource *res; - u32 remainder = 0; + u64 remainder = 0; lockdep_assert_held_write(&cxl_region_rwsem); @@ -470,7 +470,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) return -ENXIO; - div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder); + div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); if (remainder) return -EINVAL; diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 1f4219ea8579..259a44f230fc 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work) if (err) dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err); + if (devfreq->stop_polling) + goto out; + queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); - mutex_unlock(&devfreq->lock); +out: + mutex_unlock(&devfreq->lock); trace_devfreq_monitor(devfreq); } @@ -482,6 +486,10 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (delayed_work_pending(&devfreq->work)) + goto out; + switch (devfreq->profile->timer) { case DEVFREQ_TIMER_DEFERRABLE: INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); @@ -490,12 +498,16 @@ void devfreq_monitor_start(struct devfreq *devfreq) INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); break; default: - return; + goto out; } if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); + +out: + devfreq->stop_polling = false; + mutex_unlock(&devfreq->lock); } EXPORT_SYMBOL(devfreq_monitor_start); @@ -512,6 +524,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); cancel_delayed_work_sync(&devfreq->work); } EXPORT_SYMBOL(devfreq_monitor_stop); @@ -1687,7 +1707,7 @@ static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct devfreq *df = to_devfreq(dev); - ssize_t len; + ssize_t len = 0; int i, j; unsigned int max_state; @@ -1696,7 +1716,7 @@ static ssize_t trans_stat_show(struct device *dev, max_state = df->max_state; if (max_state == 0) - return sprintf(buf, "Not Supported.\n"); + return scnprintf(buf, PAGE_SIZE, "Not Supported.\n"); mutex_lock(&df->lock); if (!df->stop_polling && @@ -1706,31 +1726,52 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&df->lock); - len = sprintf(buf, " From : To\n"); - len += sprintf(buf + len, " :"); - for (i = 0; i < max_state; i++) - len += sprintf(buf + len, "%10lu", - df->freq_table[i]); + len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " :"); + for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu", + df->freq_table[i]); + } + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; - len += sprintf(buf + len, " time(ms)\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " time(ms)\n"); for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; if (df->freq_table[i] == df->previous_freq) - len += sprintf(buf + len, "*"); + len += scnprintf(buf + len, PAGE_SIZE - len, "*"); else - len += sprintf(buf + len, " "); + len += scnprintf(buf + len, PAGE_SIZE - len, " "); + if (len >= PAGE_SIZE - 1) + break; - len += sprintf(buf + len, "%10lu:", df->freq_table[i]); - for (j = 0; j < max_state; j++) - len += sprintf(buf + len, "%10u", - df->stats.trans_table[(i * max_state) + j]); - - len += sprintf(buf + len, "%10llu\n", (u64) - jiffies64_to_msecs(df->stats.time_in_state[i])); + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu:", + df->freq_table[i]); + for (j = 0; j < max_state; j++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10u", + df->stats.trans_table[(i * max_state) + j]); + } + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10llu\n", (u64) + jiffies64_to_msecs(df->stats.time_in_state[i])); + } + + if (len < PAGE_SIZE - 1) + len += scnprintf(buf + len, PAGE_SIZE - len, "Total transition : %u\n", + df->stats.total_trans); + + if (len >= PAGE_SIZE - 1) { + pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); + return -EFBIG; } - len += sprintf(buf + len, "Total transition : %u\n", - df->stats.total_trans); return len; } diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 81de833ccd04..66ef0a111484 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -665,16 +665,16 @@ config TEGRA20_APB_DMA config TEGRA210_ADMA tristate "NVIDIA Tegra210 ADMA support" - depends on (ARCH_TEGRA_210_SOC || COMPILE_TEST) + depends on (ARCH_TEGRA || COMPILE_TEST) select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Support for the NVIDIA Tegra210 ADMA controller driver. The - DMA controller has multiple DMA channels and is used to service - various audio clients in the Tegra210 audio processing engine - (APE). This DMA controller transfers data from memory to - peripheral and vice versa. It does not support memory to - memory data transfer. + Support for the NVIDIA Tegra210/Tegra186/Tegra194/Tegra234 ADMA + controller driver. The DMA controller has multiple DMA channels + and is used to service various audio clients in the Tegra210 + audio processing engine (APE). This DMA controller transfers + data from memory to peripheral and vice versa. It does not + support memory to memory data transfer. config TIMB_DMA tristate "Timberdale FPGA DMA support" diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 4cf8da77bdd9..cac4532fe23a 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -56,6 +56,8 @@ #define REG_BUS_WIDTH(ch) (0x8040 + (ch) * 0x200) +#define BUS_WIDTH_WORD_SIZE GENMASK(3, 0) +#define BUS_WIDTH_FRAME_SIZE GENMASK(7, 4) #define BUS_WIDTH_8BIT 0x00 #define BUS_WIDTH_16BIT 0x01 #define BUS_WIDTH_32BIT 0x02 @@ -739,7 +741,8 @@ static int admac_device_config(struct dma_chan *chan, struct admac_data *ad = adchan->host; bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV; int wordsize = 0; - u32 bus_width = 0; + u32 bus_width = readl_relaxed(ad->base + REG_BUS_WIDTH(adchan->no)) & + ~(BUS_WIDTH_WORD_SIZE | BUS_WIDTH_FRAME_SIZE); switch (is_tx ? config->dst_addr_width : config->src_addr_width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 8a6e6b60d66f..892e8389232e 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1103,6 +1103,9 @@ EXPORT_SYMBOL_GPL(dma_async_device_channel_register); static void __dma_async_device_channel_unregister(struct dma_device *device, struct dma_chan *chan) { + if (chan->local == NULL) + return; + WARN_ONCE(!device->device_release && chan->client_count, "%s called while %d clients hold a reference\n", __func__, chan->client_count); diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c index 8dd40d00a672..6b829d347417 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -38,15 +38,17 @@ static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan) if (!dpaa2_chan->fd_pool) goto err; - dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev, - sizeof(struct dpaa2_fl_entry), - sizeof(struct dpaa2_fl_entry), 0); + dpaa2_chan->fl_pool = + dma_pool_create("fl_pool", dev, + sizeof(struct dpaa2_fl_entry) * 3, + sizeof(struct dpaa2_fl_entry), 0); + if (!dpaa2_chan->fl_pool) goto err_fd; dpaa2_chan->sdd_pool = dma_pool_create("sdd_pool", dev, - sizeof(struct dpaa2_qdma_sd_d), + sizeof(struct dpaa2_qdma_sd_d) * 2, sizeof(struct dpaa2_qdma_sd_d), 0); if (!dpaa2_chan->sdd_pool) goto err_fl; diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 045ead46ec8f..7082a5a6814a 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -109,6 +109,7 @@ #define FSL_QDMA_CMD_WTHROTL_OFFSET 20 #define FSL_QDMA_CMD_DSEN_OFFSET 19 #define FSL_QDMA_CMD_LWC_OFFSET 16 +#define FSL_QDMA_CMD_PF BIT(17) /* Field definition for Descriptor status */ #define QDMA_CCDF_STATUS_RTE BIT(5) @@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); + FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF; sdf->data = QDMA_SDDF_CMD(cmd); cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << @@ -514,11 +516,11 @@ static struct fsl_qdma_queue queue_temp = queue_head + i + (j * queue_num); queue_temp->cq = - dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - queue_size[i], - &queue_temp->bus_addr, - GFP_KERNEL); + dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + queue_size[i], + &queue_temp->bus_addr, + GFP_KERNEL); if (!queue_temp->cq) return NULL; queue_temp->block_base = fsl_qdma->block_base + @@ -563,11 +565,11 @@ static struct fsl_qdma_queue /* * Buffer for queue command */ - status_head->cq = dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - status_size, - &status_head->bus_addr, - GFP_KERNEL); + status_head->cq = dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + status_size, + &status_head->bus_addr, + GFP_KERNEL); if (!status_head->cq) { devm_kfree(&pdev->dev, status_head); return NULL; @@ -805,7 +807,7 @@ fsl_qdma_irq_init(struct platform_device *pdev, int i; int cpu; int ret; - char irq_name[20]; + char irq_name[32]; fsl_qdma->error_irq = platform_get_irq_byname(pdev, "qdma-error"); @@ -1201,10 +1203,6 @@ static int fsl_qdma_probe(struct platform_device *pdev) if (!fsl_qdma->queue) return -ENOMEM; - ret = fsl_qdma_irq_init(pdev, fsl_qdma); - if (ret) - return ret; - fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); if (fsl_qdma->irq_base < 0) return fsl_qdma->irq_base; @@ -1243,19 +1241,22 @@ static int fsl_qdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fsl_qdma); - ret = dma_async_device_register(&fsl_qdma->dma_dev); - if (ret) { - dev_err(&pdev->dev, - "Can't register NXP Layerscape qDMA engine.\n"); - return ret; - } - ret = fsl_qdma_reg_init(fsl_qdma); if (ret) { dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); return ret; } + ret = fsl_qdma_irq_init(pdev, fsl_qdma); + if (ret) + return ret; + + ret = dma_async_device_register(&fsl_qdma->dma_dev); + if (ret) { + dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n"); + return ret; + } + return 0; } @@ -1272,8 +1273,6 @@ static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev) static int fsl_qdma_remove(struct platform_device *pdev) { - int i; - struct fsl_qdma_queue *status; struct device_node *np = pdev->dev.of_node; struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev); @@ -1282,11 +1281,6 @@ static int fsl_qdma_remove(struct platform_device *pdev) of_dma_controller_free(np); dma_async_device_unregister(&fsl_qdma->dma_dev); - for (i = 0; i < fsl_qdma->block_number; i++) { - status = fsl_qdma->status[i]; - dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) * - status->n_cq, status->cq, status->bus_addr); - } return 0; } diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index e2070df6cad2..0b846c605d4b 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -584,11 +584,11 @@ desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) } /** - * __cleanup - reclaim used descriptors + * __ioat_cleanup - reclaim used descriptors * @ioat_chan: channel (ring) to clean * @phys_complete: zeroed (or not) completion address (from status) */ -static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) +static void __ioat_cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; struct ioat_ring_ent *desc; @@ -675,7 +675,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan) spin_lock_bh(&ioat_chan->cleanup_lock); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); if (is_ioat_halted(*ioat_chan->completion)) { u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); @@ -712,7 +712,7 @@ static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) ioat_quiesce(ioat_chan, 0); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); __ioat_restart_chan(ioat_chan); } @@ -786,7 +786,7 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan) /* cleanup so tail points to descriptor that caused the error */ if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); @@ -943,7 +943,7 @@ void ioat_timer_event(struct timer_list *t) /* timer restarted in ioat_cleanup_preamble * and IOAT_COMPLETION_ACK cleared */ - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); goto unlock_out; } diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c index 1aa65e5de0f3..f79240734807 100644 --- a/drivers/dma/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/ptdma/ptdma-dmaengine.c @@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt) chan->vc.desc_free = pt_do_cleanup; vchan_init(&chan->vc, dma_dev); - dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); - ret = dma_async_device_register(dma_dev); if (ret) goto err_reg; diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h index 9c121a4b33ad..f97d80343aea 100644 --- a/drivers/dma/sh/shdma.h +++ b/drivers/dma/sh/shdma.h @@ -25,7 +25,7 @@ struct sh_dmae_chan { const struct sh_dmae_slave_config *config; /* Slave DMA configuration */ int xmit_shift; /* log_2(bytes_per_xfer) */ void __iomem *base; - char dev_id[16]; /* unique name per DMAC of channel */ + char dev_id[32]; /* unique name per DMAC of channel */ int pm_error; dma_addr_t slave_addr; }; diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 7ec6e5d728b0..9212ac9f978f 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2413,6 +2413,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, ecc); if (ret) { @@ -2429,6 +2434,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, ecc); if (ret) { diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b86b809eb1f7..82e7acfda6ed 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -3963,6 +3963,7 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, { struct udma_chan *uc = to_udma_chan(&vc->chan); struct udma_desc *d; + u8 status; if (!vd) return; @@ -3972,12 +3973,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, if (d->metadata_size) udma_fetch_epib(uc, d); - /* Provide residue information for the client */ if (result) { void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx); if (cppi5_desc_get_type(desc_vaddr) == CPPI5_INFO0_DESC_TYPE_VAL_HOST) { + /* Provide residue information for the client */ result->residue = d->residue - cppi5_hdesc_get_pktlen(desc_vaddr); if (result->residue) @@ -3986,7 +3987,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, result->result = DMA_TRANS_NOERROR; } else { result->residue = 0; - result->result = DMA_TRANS_NOERROR; + /* Propagate TR Response errors to the client */ + status = d->hwdesc[0].tr_resp_base->status; + if (status) + result->result = DMA_TRANS_ABORTED; + else + result->result = DMA_TRANS_NOERROR; } } } diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 6ac5ff20a2fe..401a77e3b5fa 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work) */ card->bm_generation = generation; - if (root_device == NULL) { + if (card->gap_count == 0) { + /* + * If self IDs have inconsistent gap counts, do a + * bus reset ASAP. The config rom read might never + * complete, so don't wait for it. However, still + * send a PHY configuration packet prior to the + * bus reset. The PHY configuration packet might + * fail, but 1394-2008 8.4.5.2 explicitly permits + * it in this case, so it should be safe to try. + */ + new_root_id = local_id; + /* + * We must always send a bus reset if the gap count + * is inconsistent, so bypass the 5-reset limit. + */ + card->bm_retries = 0; + } else if (root_device == NULL) { /* * Either link_on is false, or we failed to read the * config rom. In either case, pick another root. @@ -484,7 +500,19 @@ static void bm_work(struct work_struct *work) fw_notice(card, "phy config: new root=%x, gap_count=%d\n", new_root_id, gap_count); fw_send_phy_config(card, new_root_id, generation, gap_count); - reset_bus(card, true); + /* + * Where possible, use a short bus reset to minimize + * disruption to isochronous transfers. But in the event + * of a gap count inconsistency, use a long bus reset. + * + * As noted in 1394a 8.4.6.2, nodes on a mixed 1394/1394a bus + * may set different gap counts after a bus reset. On a mixed + * 1394/1394a bus, a short bus reset can get doubled. Some + * nodes may treat the double reset as one bus reset and others + * may treat it as two, causing a gap count inconsistency + * again. Using a long bus reset prevents this. + */ + reset_bus(card, card->gap_count != 0); /* Will allocate broadcast channel after the reset. */ goto out; } diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 74bab06283b7..1879ec27c023 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -100,10 +100,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) * @buf: where to put the string * @size: size of @buf, in bytes * - * The string is taken from a minimal ASCII text descriptor leaf after - * the immediate entry with @key. The string is zero-terminated. - * An overlong string is silently truncated such that it and the - * zero byte fit into @size. + * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the + * @key. The string is zero-terminated. An overlong string is silently truncated such that it + * and the zero byte fit into @size. * * Returns strlen(buf) or a negative error code. */ diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index a1c0154c31c6..4ac72754c255 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -244,6 +244,7 @@ void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem, void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem); bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer); +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem); /* declarations for message passing transports */ struct scmi_msg_payld; diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 25d31dfdad15..81c902672a30 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -43,6 +43,20 @@ static void rx_callback(struct mbox_client *cl, void *m) { struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl); + /* + * An A2P IRQ is NOT valid when received while the platform still has + * the ownership of the channel, because the platform at first releases + * the SMT channel and then sends the completion interrupt. + * + * This addresses a possible race condition in which a spurious IRQ from + * a previous timed-out reply which arrived late could be wrongly + * associated with the next pending transaction. + */ + if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) { + dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n"); + return; + } + scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL); } diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 87b4f4d35f06..517d52fb3bcb 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, (SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR | SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); } + +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem) +{ + return (ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); +} diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c index ac0bd51ef16a..42ea308a2c1d 100644 --- a/drivers/firmware/arm_scmi/smc.c +++ b/drivers/firmware/arm_scmi/smc.c @@ -171,6 +171,13 @@ static int smc_chan_free(int id, void *p, void *data) struct scmi_chan_info *cinfo = p; struct scmi_smc *scmi_info = cinfo->transport_info; + /* + * Different protocols might share the same chan info, so a previous + * smc_chan_free call might have already freed the structure. + */ + if (!scmi_info) + return 0; + /* Ignore any possible further reception on the IRQ path */ if (scmi_info->irq > 0) free_irq(scmi_info->irq, scmi_info); diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 7c48c380d722..1995f0a2e0fc 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 3e8d4b51a814..97bafb5f7038 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } - cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); if (!cap_info->phys) { kfree(cap_info->pages); kfree(cap_info); diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index 2fd770b499a3..ff9791ce2e15 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -116,15 +116,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - /* - * Special purpose memory is 'soft reserved', which means it - * is set aside initially, but can be hotplugged back in or - * be assigned to the dax driver after boot. - */ - if (efi_soft_reserve_enabled() && - (md->attribute & EFI_MEMORY_SP)) - return false; - /* * According to the spec, these regions are no longer reserved * after calling ExitBootServices(). However, we can only use @@ -169,6 +160,16 @@ static __init void reserve_regions(void) size = npages << PAGE_SHIFT; if (is_memory(md)) { + /* + * Special purpose memory is 'soft reserved', which + * means it is set aside initially. Don't add a memblock + * for it now so that it can be hotplugged back in or + * be assigned to the dax driver after boot. + */ + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + early_init_dt_add_memory_arch(paddr, size); if (!is_usable_memory(md)) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index b7c0e8cc0764..28d4defc5d0c 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -185,8 +185,29 @@ static const struct attribute_group efi_subsys_attr_group = { static struct efivars generic_efivars; static struct efivar_operations generic_ops; +static bool generic_ops_supported(void) +{ + unsigned long name_size; + efi_status_t status; + efi_char16_t name; + efi_guid_t guid; + + name_size = sizeof(name); + + if (!efi.get_next_variable) + return false; + status = efi.get_next_variable(&name_size, &name, &guid); + if (status == EFI_UNSUPPORTED) + return false; + + return true; +} + static int generic_ops_register(void) { + if (!generic_ops_supported()) + return 0; + generic_ops.get_variable = efi.get_variable; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; @@ -200,6 +221,9 @@ static int generic_ops_register(void) static void generic_ops_unregister(void) { + if (!generic_ops.get_variable) + return; + efivars_unregister(&generic_efivars); } diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index ef5045a53ce0..473ef18421db 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -25,7 +25,7 @@ cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ - -fpic + -fpic -mno-relax cflags-$(CONFIG_LOONGARCH) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fpie @@ -84,6 +84,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o smbios.o lib-$(CONFIG_X86) += x86-stub.o +lib-$(CONFIG_X86_64) += x86-5lvl.o lib-$(CONFIG_RISCV) += riscv-stub.o lib-$(CONFIG_LOONGARCH) += loongarch-stub.o diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c index 1de9878ddd3a..6b83c492c3b8 100644 --- a/drivers/firmware/efi/libstub/alignedmem.c +++ b/drivers/firmware/efi/libstub/alignedmem.c @@ -22,12 +22,15 @@ * Return: status code */ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, - unsigned long max, unsigned long align) + unsigned long max, unsigned long align, + int memory_type) { efi_physical_addr_t alloc_addr; efi_status_t status; int slack; + max = min(max, EFI_ALLOC_LIMIT); + if (align < EFI_ALLOC_ALIGN) align = EFI_ALLOC_ALIGN; @@ -36,7 +39,7 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, slack = align / EFI_PAGE_SIZE - 1; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, - EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack, + memory_type, size / EFI_PAGE_SIZE + slack, &alloc_addr); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e2f90566b291..16f15e36f9a7 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -180,7 +180,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, * locate the kernel at a randomized offset in physical memory. */ status = efi_random_alloc(*reserve_size, min_kimg_align, - reserve_addr, phys_seed); + reserve_addr, phys_seed, + EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { @@ -190,10 +191,11 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (status != EFI_SUCCESS) { if (!check_image_region((u64)_text, kernel_memsize)) { efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); - } else if (IS_ALIGNED((u64)_text, min_kimg_align)) { + } else if (IS_ALIGNED((u64)_text, min_kimg_align) && + (u64)_end < EFI_ALLOC_LIMIT) { /* * Just execute from wherever we were loaded by the - * UEFI PE/COFF loader if the alignment is suitable. + * UEFI PE/COFF loader if the placement is suitable. */ *image_addr = (u64)_text; *reserve_size = 0; @@ -201,7 +203,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, - ULONG_MAX, min_kimg_align); + ULONG_MAX, min_kimg_align, + EFI_LOADER_CODE); if (status != EFI_SUCCESS) { efi_err("Failed to relocate kernel\n"); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 3d9b2469a0df..97744822dd95 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -216,6 +216,8 @@ efi_status_t efi_parse_options(char const *cmdline) efi_loglevel = CONSOLE_LOGLEVEL_QUIET; } else if (!strcmp(param, "noinitrd")) { efi_noinitrd = true; + } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { + efi_no5lvl = true; } else if (!strcmp(param, "efi") && val) { efi_nochunk = parse_option_str(val, "nochunk"); efi_novamap |= parse_option_str(val, "novamap"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 970e86e3aab0..6741f3d900c5 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -29,6 +29,11 @@ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +#ifndef EFI_ALLOC_LIMIT +#define EFI_ALLOC_LIMIT ULONG_MAX +#endif + +extern bool efi_no5lvl; extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; @@ -415,6 +420,26 @@ union efi_dxe_services_table { } mixed_mode; }; +typedef union efi_memory_attribute_protocol efi_memory_attribute_protocol_t; + +union efi_memory_attribute_protocol { + struct { + efi_status_t (__efiapi *get_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64 *); + + efi_status_t (__efiapi *set_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); + + efi_status_t (__efiapi *clear_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); + }; + struct { + u32 get_memory_attributes; + u32 set_memory_attributes; + u32 clear_memory_attributes; + } mixed_mode; +}; + typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; union efi_uga_draw_protocol { @@ -880,7 +905,9 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, - unsigned long *addr, unsigned long random_seed); + unsigned long *addr, unsigned long random_seed, + int memory_type, unsigned long alloc_min, + unsigned long alloc_max); efi_status_t efi_random_get_seed(void); @@ -907,7 +934,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, unsigned long max); efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, - unsigned long max, unsigned long align); + unsigned long max, unsigned long align, + int memory_type); efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index 45841ef55a9f..4f1fa302234d 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -89,9 +89,12 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, efi_physical_addr_t alloc_addr; efi_status_t status; + max = min(max, EFI_ALLOC_LIMIT); + if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE) return efi_allocate_pages_aligned(size, addr, max, - EFI_ALLOC_ALIGN); + EFI_ALLOC_ALIGN, + EFI_LOADER_DATA); alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 9fb5869896be..fff826f56728 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -16,7 +16,8 @@ */ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, unsigned long size, - unsigned long align_shift) + unsigned long align_shift, + u64 alloc_min, u64 alloc_max) { unsigned long align = 1UL << align_shift; u64 first_slot, last_slot, region_end; @@ -29,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - (u64)ULONG_MAX); + alloc_max); if (region_end < size) return 0; - first_slot = round_up(md->phys_addr, align); + first_slot = round_up(max(md->phys_addr, alloc_min), align); last_slot = round_down(region_end - size + 1, align); if (first_slot > last_slot) @@ -53,7 +54,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, - unsigned long random_seed) + unsigned long random_seed, + int memory_type, + unsigned long alloc_min, + unsigned long alloc_max) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -75,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size, efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; - slots = get_entry_num_slots(md, size, ilog2(align)); + slots = get_entry_num_slots(md, size, ilog2(align), alloc_min, + alloc_max); MD_NUM_SLOTS(md) = slots; total_slots += slots; if (md->attribute & EFI_MEMORY_MORE_RELIABLE) @@ -114,11 +119,11 @@ efi_status_t efi_random_alloc(unsigned long size, continue; } - target = round_up(md->phys_addr, align) + target_slot * align; + target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align; pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, pages, &target); + memory_type, pages, &target); if (status == EFI_SUCCESS) *addr = target; break; diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c new file mode 100644 index 000000000000..479dd445acdc --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-5lvl.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include + +#include +#include +#include + +#include "efistub.h" +#include "x86-stub.h" + +bool efi_no5lvl; + +static void (*la57_toggle)(void *cr3); + +static const struct desc_struct gdt[] = { + [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), +}; + +/* + * Enabling (or disabling) 5 level paging is tricky, because it can only be + * done from 32-bit mode with paging disabled. This means not only that the + * code itself must be running from 32-bit addressable physical memory, but + * also that the root page table must be 32-bit addressable, as programming + * a 64-bit value into CR3 when running in 32-bit mode is not supported. + */ +efi_status_t efi_setup_5level_paging(void) +{ + u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src; + efi_status_t status; + u8 *la57_code; + + if (!efi_is_64bit()) + return EFI_SUCCESS; + + /* check for 5 level paging support */ + if (native_cpuid_eax(0) < 7 || + !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return EFI_SUCCESS; + + /* allocate some 32-bit addressable memory for code and a page table */ + status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code, + U32_MAX); + if (status != EFI_SUCCESS) + return status; + + la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size); + memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size); + + /* + * To avoid the need to allocate a 32-bit addressable stack, the + * trampoline uses a LJMP instruction to switch back to long mode. + * LJMP takes an absolute destination address, which needs to be + * fixed up at runtime. + */ + *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code; + + efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE); + + return EFI_SUCCESS; +} + +void efi_5level_switch(void) +{ + bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl; + bool have_la57 = native_read_cr4() & X86_CR4_LA57; + bool need_toggle = want_la57 ^ have_la57; + u64 *pgt = (void *)la57_toggle + PAGE_SIZE; + u64 *cr3 = (u64 *)__native_read_cr3(); + u64 *new_cr3; + + if (!la57_toggle || !need_toggle) + return; + + if (!have_la57) { + /* + * 5 level paging will be enabled, so a root level page needs + * to be allocated from the 32-bit addressable physical region, + * with its first entry referring to the existing hierarchy. + */ + new_cr3 = memset(pgt, 0, PAGE_SIZE); + new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC; + } else { + /* take the new root table pointer from the current entry #0 */ + new_cr3 = (u64 *)(cr3[0] & PAGE_MASK); + + /* copy the new root table if it is not 32-bit addressable */ + if ((u64)new_cr3 > U32_MAX) + new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE); + } + + native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt }); + + la57_toggle(new_cr3); +} diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 4f0152b11a89..dc50dda40239 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -15,16 +15,18 @@ #include #include #include +#include +#include #include "efistub.h" +#include "x86-stub.h" -/* Maximum physical address for 64-bit kernel with 4-level paging */ -#define MAXMEM_X86_64_4LEVEL (1ull << 46) +extern char _bss[], _ebss[]; const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; -extern u32 image_offset; static efi_loaded_image_t *image = NULL; +static efi_memory_attribute_protocol_t *memattr; static efi_status_t preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) @@ -212,8 +214,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } -static void -adjust_memory_range_protection(unsigned long start, unsigned long size) +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) { efi_status_t status; efi_gcd_memory_space_desc_t desc; @@ -221,12 +223,22 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) unsigned long rounded_start, rounded_end; unsigned long unprotect_start, unprotect_size; - if (efi_dxe_table == NULL) - return; - rounded_start = rounddown(start, EFI_PAGE_SIZE); rounded_end = roundup(start + size, EFI_PAGE_SIZE); + if (memattr != NULL) { + status = efi_call_proto(memattr, clear_memory_attributes, + rounded_start, + rounded_end - rounded_start, + EFI_MEMORY_XP); + if (status != EFI_SUCCESS) + efi_warn("Failed to clear EFI_MEMORY_XP attribute\n"); + return status; + } + + if (efi_dxe_table == NULL) + return EFI_SUCCESS; + /* * Don't modify memory region attributes, they are * already suitable, to lower the possibility to @@ -238,7 +250,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) status = efi_dxe_call(get_memory_space_descriptor, start, &desc); if (status != EFI_SUCCESS) - return; + break; next = desc.base_address + desc.length; @@ -263,69 +275,26 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) unprotect_start, unprotect_start + unprotect_size, status); + break; } } + return EFI_SUCCESS; } -/* - * Trampoline takes 2 pages and can be loaded in first megabyte of memory - * with its end placed between 128k and 640k where BIOS might start. - * (see arch/x86/boot/compressed/pgtable_64.c) - * - * We cannot find exact trampoline placement since memory map - * can be modified by UEFI, and it can alter the computed address. - */ - -#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) -#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) - -void startup_32(struct boot_params *boot_params); - -static void -setup_memory_protection(unsigned long image_base, unsigned long image_size) +static efi_char16_t *efistub_fw_vendor(void) { - /* - * Allow execution of possible trampoline used - * for switching between 4- and 5-level page tables - * and relocated kernel image. - */ + unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor); - adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, - TRAMPOLINE_PLACEMENT_SIZE); - -#ifdef CONFIG_64BIT - if (image_base != (unsigned long)startup_32) - adjust_memory_range_protection(image_base, image_size); -#else - /* - * Clear protection flags on a whole range of possible - * addresses used for KASLR. We don't need to do that - * on x86_64, since KASLR/extraction is performed after - * dedicated identity page tables are built and we only - * need to remove possible protection on relocated image - * itself disregarding further relocations. - */ - adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); -#endif + return (efi_char16_t *)vendor; } static const efi_char16_t apple[] = L"Apple"; -static void setup_quirks(struct boot_params *boot_params, - unsigned long image_base, - unsigned long image_size) +static void setup_quirks(struct boot_params *boot_params) { - efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor); - - if (!memcmp(fw_vendor, apple, sizeof(apple))) { - if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) - retrieve_apple_device_properties(boot_params); - } - - if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) - setup_memory_protection(image_base, image_size); + if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && + !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) + retrieve_apple_device_properties(boot_params); } /* @@ -465,6 +434,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_status_t status; char *cmdline_ptr; + if (efi_is_native()) + memset(_bss, 0, _ebss - _bss); + efi_system_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ @@ -478,7 +450,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, } image_base = efi_table_attr(image, image_base); - image_offset = (void *)startup_32 - image_base; status = efi_allocate_pages(sizeof(struct boot_params), (unsigned long *)&boot_params, ULONG_MAX); @@ -760,85 +731,139 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return EFI_SUCCESS; } -/* - * On success, we return the address of startup_32, which has potentially been - * relocated by efi_relocate_kernel. - * On failure, we exit to the firmware via efi_exit instead of returning. - */ -asmlinkage unsigned long efi_main(efi_handle_t handle, - efi_system_table_t *sys_table_arg, - struct boot_params *boot_params) +static bool have_unsupported_snp_features(void) { - unsigned long bzimage_addr = (unsigned long)startup_32; - unsigned long buffer_start, buffer_end; + u64 unsupported; + + unsupported = snp_get_unsupported_features(sev_get_status()); + if (unsupported) { + efi_err("Unsupported SEV-SNP features detected: 0x%llx\n", + unsupported); + return true; + } + return false; +} + +static void efi_get_seed(void *seed, int size) +{ + efi_get_random_bytes(size, seed); + + /* + * This only updates seed[0] when running on 32-bit, but in that case, + * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit. + */ + *(unsigned long *)seed ^= kaslr_get_random_long("EFI"); +} + +static void error(char *str) +{ + efi_warn("Decompression failed: %s\n", str); +} + +static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) +{ + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + unsigned long addr, alloc_size, entry; + efi_status_t status; + u32 seed[2] = {}; + + /* determine the required size of the allocation */ + alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size), + MIN_KERNEL_ALIGN); + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { + u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + static const efi_char16_t ami[] = L"American Megatrends"; + + efi_get_seed(seed, sizeof(seed)); + + virt_addr += (range * seed[1]) >> 32; + virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + + /* + * Older Dell systems with AMI UEFI firmware v2.0 may hang + * while decompressing the kernel if physical address + * randomization is enabled. + * + * https://bugzilla.kernel.org/show_bug.cgi?id=218173 + */ + if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION && + !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { + efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); + seed[0] = 0; + } + + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; + } + + status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, + seed[0], EFI_LOADER_CODE, + LOAD_PHYSICAL_ADDR, + EFI_X86_KERNEL_ALLOC_LIMIT); + if (status != EFI_SUCCESS) + return status; + + entry = decompress_kernel((void *)addr, virt_addr, error); + if (entry == ULONG_MAX) { + efi_free(alloc_size, addr); + return EFI_LOAD_ERROR; + } + + *kernel_entry = addr + entry; + + return efi_adjust_memory_range_protection(addr, kernel_total_size); +} + +static void __noreturn enter_kernel(unsigned long kernel_addr, + struct boot_params *boot_params) +{ + /* enter decompressed kernel with boot_params pointer in RSI/ESI */ + asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params)); + + unreachable(); +} + +/* + * On success, this routine will jump to the relocated image directly and never + * return. On failure, it will exit to the firmware via efi_exit() instead of + * returning. + */ +void __noreturn efi_stub_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) +{ + efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; struct setup_header *hdr = &boot_params->hdr; const struct linux_efi_initrd *initrd = NULL; + unsigned long kernel_entry; efi_status_t status; + boot_params_ptr = boot_params; + efi_system_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) efi_exit(handle, EFI_INVALID_PARAMETER); - efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); - if (efi_dxe_table && - efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { - efi_warn("Ignoring DXE services table: invalid signature\n"); - efi_dxe_table = NULL; + if (have_unsupported_snp_features()) + efi_exit(handle, EFI_UNSUPPORTED); + + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { + efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); + if (efi_dxe_table && + efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { + efi_warn("Ignoring DXE services table: invalid signature\n"); + efi_dxe_table = NULL; + } } - /* - * If the kernel isn't already loaded at a suitable address, - * relocate it. - * - * It must be loaded above LOAD_PHYSICAL_ADDR. - * - * The maximum address for 64-bit is 1 << 46 for 4-level paging. This - * is defined as the macro MAXMEM, but unfortunately that is not a - * compile-time constant if 5-level paging is configured, so we instead - * define our own macro for use here. - * - * For 32-bit, the maximum address is complicated to figure out, for - * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what - * KASLR uses. - * - * Also relocate it if image_offset is zero, i.e. the kernel wasn't - * loaded by LoadImage, but rather by a bootloader that called the - * handover entry. The reason we must always relocate in this case is - * to handle the case of systemd-boot booting a unified kernel image, - * which is a PE executable that contains the bzImage and an initrd as - * COFF sections. The initrd section is placed after the bzImage - * without ensuring that there are at least init_size bytes available - * for the bzImage, and thus the compressed kernel's startup code may - * overwrite the initrd unless it is moved out of the way. - */ + /* grab the memory attributes protocol if it exists */ + efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr); - buffer_start = ALIGN(bzimage_addr - image_offset, - hdr->kernel_alignment); - buffer_end = buffer_start + hdr->init_size; - - if ((buffer_start < LOAD_PHYSICAL_ADDR) || - (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || - (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || - (image_offset == 0)) { - extern char _bss[]; - - status = efi_relocate_kernel(&bzimage_addr, - (unsigned long)_bss - bzimage_addr, - hdr->init_size, - hdr->pref_address, - hdr->kernel_alignment, - LOAD_PHYSICAL_ADDR); - if (status != EFI_SUCCESS) { - efi_err("efi_relocate_kernel() failed!\n"); - goto fail; - } - /* - * Now that we've copied the kernel elsewhere, we no longer - * have a set up block before startup_32(), so reset image_offset - * to zero in case it was set earlier. - */ - image_offset = 0; + status = efi_setup_5level_paging(); + if (status != EFI_SUCCESS) { + efi_err("efi_setup_5level_paging() failed!\n"); + goto fail; } #ifdef CONFIG_CMDLINE_BOOL @@ -858,6 +883,12 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, } } + status = efi_decompress_kernel(&kernel_entry); + if (status != EFI_SUCCESS) { + efi_err("Failed to decompress kernel\n"); + goto fail; + } + /* * At this point, an initrd may already have been loaded by the * bootloader and passed via bootparams. We permit an initrd loaded @@ -897,7 +928,7 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, setup_efi_pci(boot_params); - setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); + setup_quirks(boot_params); status = exit_boot(boot_params, handle); if (status != EFI_SUCCESS) { @@ -905,9 +936,36 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, goto fail; } - return bzimage_addr; + /* + * Call the SEV init code while still running with the firmware's + * GDT/IDT, so #VC exceptions will be handled by EFI. + */ + sev_enable(boot_params); + + efi_5level_switch(); + + enter_kernel(kernel_entry, boot_params); fail: - efi_err("efi_main() failed!\n"); + efi_err("efi_stub_entry() failed!\n"); efi_exit(handle, status); } + +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) +{ + memset(_bss, 0, _ebss - _bss); + efi_stub_entry(handle, sys_table_arg, boot_params); +} + +#ifndef CONFIG_EFI_MIXED +extern __alias(efi_handover_entry) +void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); + +extern __alias(efi_handover_entry) +void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); +#endif +#endif diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h new file mode 100644 index 000000000000..1c20e99a6494 --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +extern void trampoline_32bit_src(void *, bool); +extern const u16 trampoline_ljmp_imm_offset; + +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size); + +#ifdef CONFIG_X86_64 +efi_status_t efi_setup_5level_paging(void); +void efi_5level_switch(void); +#else +static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; } +static inline void efi_5level_switch(void) {} +#endif diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c index d0daacd2c903..6b142aa35389 100644 --- a/drivers/firmware/efi/riscv-runtime.c +++ b/drivers/firmware/efi/riscv-runtime.c @@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 0ba9f18312f5..4ca256bcd697 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -66,19 +66,28 @@ int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject) { + int rv; + if (down_interruptible(&efivars_lock)) return -EINTR; + if (__efivars) { + pr_warn("efivars already registered\n"); + rv = -EBUSY; + goto out; + } + efivars->ops = ops; efivars->kobject = kobject; __efivars = efivars; pr_info("Registered efivars operations\n"); - + rv = 0; +out: up(&efivars_lock); - return 0; + return rv; } EXPORT_SYMBOL_GPL(efivars_register); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index a14871d68e5f..3f0c3f2ce51b 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -679,7 +679,8 @@ config GPIO_UNIPHIER Say yes here to support UniPhier GPIOs. config GPIO_VF610 - def_bool y + bool "VF610 GPIO support" + default y if SOC_VF610 depends on ARCH_MXC select GPIOLIB_IRQCHIP help diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index e00c33310517..753e7be039e4 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi) if (IS_ERR(chip->gpiod_oe)) return PTR_ERR(chip->gpiod_oe); - gpiod_set_value_cansleep(chip->gpiod_oe, 1); - spi_set_drvdata(spi, chip); chip->gpio_chip.label = spi->modalias; @@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi) goto exit_destroy; } + gpiod_set_value_cansleep(chip->gpiod_oe, 1); + ret = gpiochip_add_data(&chip->gpio_chip, chip); if (!ret) return 0; diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index 8d722e026e9c..c2857ed0c78a 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -318,20 +318,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } break; default: return -ENOTSUPP; @@ -343,20 +350,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } break; default: return -ENOTSUPP; @@ -370,29 +384,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: @@ -405,29 +424,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 787d47e667ad..33d73687e463 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1626,6 +1626,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_wake = "ELAN0415:00@9", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 0.35 + * https://gitlab.freedesktop.org/drm/amd/-/issues/3073 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "PNP0C50:00@8", + }, + }, {} /* Terminating entry */ }; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6d3e3454a6ed..9d8c78312403 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -784,11 +784,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gpiochip_irqchip_init_valid_mask(gc); if (ret) - goto err_remove_acpi_chip; + goto err_free_hogs; ret = gpiochip_irqchip_init_hw(gc); if (ret) - goto err_remove_acpi_chip; + goto err_remove_irqchip_mask; ret = gpiochip_add_irqchip(gc, lock_key, request_key); if (ret) @@ -813,13 +813,13 @@ err_remove_irqchip: gpiochip_irqchip_remove(gc); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); -err_remove_acpi_chip: - acpi_gpiochip_remove(gc); -err_remove_of_chip: +err_free_hogs: gpiochip_free_hogs(gc); + acpi_gpiochip_remove(gc); + gpiochip_remove_pin_ranges(gc); +err_remove_of_chip: of_gpiochip_remove(gc); err_free_gpiochip_mask: - gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); if (gdev->dev.release) { /* release() has been registered by gpiochip_setup_dev() */ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 2b97b8a96fb4..fa6193535d48 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -333,6 +333,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, { struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_ras *con; int r; if (reset_device_list == NULL) @@ -358,7 +359,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, */ amdgpu_register_gpu_instance(tmp_adev); - /* Resume RAS */ + /* Resume RAS, ecc_irq */ + con = amdgpu_ras_get_context(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev) && con) { + if (tmp_adev->sdma.ras && + tmp_adev->sdma.ras->ras_block.ras_late_init) { + r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->sdma.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + + if (tmp_adev->gfx.ras && + tmp_adev->gfx.ras->ras_block.ras_late_init) { + r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->gfx.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + } + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c46c6fbd235e..e636c7850f77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -999,6 +999,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 469785d33791..1ef758ac5076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) return NULL; fence = container_of(f, struct amdgpu_amdkfd_fence, base); - if (fence && f->ops == &amdkfd_fence_ops) + if (f->ops == &amdkfd_fence_ops) return fence; return NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a5352e5e2bd4..6a4749c0c5a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1310,6 +1310,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; fw_ver = *((uint32_t *)adev->pm.fw->data + 69); + release_firmware(adev->pm.fw); if (fw_ver < 0x00160e00) return true; } @@ -4202,7 +4203,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); - flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index a8e1f2cfe12d..f24c3a20e901 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2202,8 +2202,6 @@ retry_init: pci_wake_from_d3(pdev, TRUE); - pci_wake_from_d3(pdev, TRUE); - /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: @@ -2416,6 +2414,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2430,6 +2429,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 23f0067f92e4..b803e785d3af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -585,8 +585,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 2bc791ed8830..ea0fb079f942 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -808,19 +808,26 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) * seconds, so here, we just pick up three parts for emulation. */ ret = memcmp(vram_ptr, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + (size / 2), cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + size - 10, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } +release_buffer: amdgpu_bo_free_kernel(&vram_bo, &vram_gpu, &vram_ptr); - return 0; + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0ee7c935fba1..cde2fd2f7117 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1222,19 +1222,15 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space - * @new_mem: new information of the bufer object * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - struct ttm_resource *old_mem = bo->resource; if (!amdgpu_bo_is_amdgpu_bo(bo)) return; @@ -1251,13 +1247,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, /* remember the eviction */ if (evict) atomic64_inc(&adev->num_evictions); - - /* update statistics */ - if (!new_mem) - return; - - /* move_notify is called before move happens */ - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 6dcd7bab42fb..2ada421e79e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -312,9 +312,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 090e66a1b284..54bdbd83a8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, /* Never sync to VM updates either. */ if (fence_owner == AMDGPU_FENCE_OWNER_VM && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) + owner != AMDGPU_FENCE_OWNER_UNDEFINED && + owner != AMDGPU_FENCE_OWNER_KFD) return false; /* Ignore fences depending on the sync mode */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 10469f20a10c..dfb9d4200773 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -555,10 +555,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } + trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); out: /* update statistics */ atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict, new_mem); + amdgpu_bo_move_notify(bo, evict); return 0; } @@ -837,6 +838,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); } + gtt->bound = true; } /* @@ -1503,7 +1505,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false, NULL); + amdgpu_bo_move_notify(bo, false); } static struct ttm_device_funcs amdgpu_bo_driver = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 6e7058a2d1c8..779707f19c88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1110,9 +1110,13 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, if (err) return -ENODEV; + err = amdgpu_ucode_validate(*fw); - if (err) + if (err) { dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name); + release_firmware(*fw); + *fw = NULL; + } return err; } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 1c5d9388ad0b..cb6eb47aab65 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -313,7 +313,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, DEBUG("IMM 0x%02X\n", val); return val; } - return 0; + break; case ATOM_ARG_PLL: idx = U8(*ptr); (*ptr)++; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index df385ffc9768..6578ca1b90af 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index b8c47e0cf37a..c19681492efa 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 84ca601f7d5f..195b29892354 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3064,6 +3064,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); + /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d96ee48e1706..35921b41fc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1144,6 +1144,10 @@ static int gmc_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 7124347d2b6c..310a5607d83b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -951,6 +951,11 @@ static int gmc_v11_0_hw_fini(void *handle) } amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + gmc_v11_0_gart_disable(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index ec291d28edff..345f0c9f551c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -921,8 +921,8 @@ static int gmc_v6_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v6_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 979da6f510e8..12411f4c1b9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1110,8 +1110,8 @@ static int gmc_v7_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v7_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 382dde1ce74c..cec9926e8bdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1240,8 +1240,8 @@ static int gmc_v8_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v8_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 0d9e9d9dd4a1..3e631aefa791 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1861,8 +1861,8 @@ static int gmc_v9_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } /** @@ -1900,6 +1900,10 @@ static int gmc_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index aecad530b10a..2c02ae69883d 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 7cd79a3844b2..657e4ca6f9dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -417,6 +417,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index eec13cb5bf75..84e8e8b008ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 9a24f17a5750..cada9f300a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 811dd3ea6362..c373a2a3248e 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -584,11 +584,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1289,6 +1312,10 @@ static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index b08905d1c00f..07a5d95be07f 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 1e83db0c5438..74c94df42345 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 59dfca093155..f1ba76c35cd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -424,6 +424,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ec8a576ac5a9..3c7d267f2a07 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1349,7 +1349,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 208812512d8a..7fa5e70f1aac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -380,14 +380,9 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); - if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { - /* We're not in the eviction worker. - * Signal the fence and synchronize with any - * pending eviction work. - */ + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) + /* We're not in the eviction worker. Signal the fence. */ dma_fence_signal(&svm_bo->eviction_fence->base); - cancel_work_sync(&svm_bo->eviction_work); - } dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); @@ -2246,8 +2241,10 @@ retry: mutex_unlock(&svms->lock); mmap_write_unlock(mm); - /* Pairs with mmget in svm_range_add_list_work */ - mmput(mm); + /* Pairs with mmget in svm_range_add_list_work. If dropping the + * last mm refcount, schedule release work to avoid circular locking + */ + mmput_async(mm); spin_lock(&svms->deferred_list_lock); } @@ -2556,6 +2553,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, { struct vm_area_struct *vma; struct interval_tree_node *node; + struct rb_node *rb_node; unsigned long start_limit, end_limit; vma = find_vma(p->mm, addr << PAGE_SHIFT); @@ -2578,16 +2576,15 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, if (node) { end_limit = min(end_limit, node->start); /* Last range that ends before the fault address */ - node = container_of(rb_prev(&node->rb), - struct interval_tree_node, rb); + rb_node = rb_prev(&node->rb); } else { /* Last range must end before addr because * there was no range after addr */ - node = container_of(rb_last(&p->svms.objects.rb_root), - struct interval_tree_node, rb); + rb_node = rb_last(&p->svms.objects.rb_root); } - if (node) { + if (rb_node) { + node = container_of(rb_node, struct interval_tree_node, rb); if (node->last >= addr) { WARN(1, "Overlap with prev node and page fault addr\n"); return -EFAULT; @@ -3310,13 +3307,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) { - if (!fence) - return -EINVAL; - - if (dma_fence_is_signaled(&fence->base)) - return 0; - - if (fence->svm_bo) { + /* Dereferencing fence->svm_bo is safe here because the fence hasn't + * signaled yet and we're under the protection of the fence->lock. + * After the fence is signaled in svm_range_bo_release, we cannot get + * here any more. + * + * Reference is dropped in svm_range_evict_svm_bo_worker. + */ + if (svm_bo_ref_unless_zero(fence->svm_bo)) { WRITE_ONCE(fence->svm_bo->evicting, 1); schedule_work(&fence->svm_bo->eviction_work); } @@ -3331,8 +3329,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) int r = 0; svm_bo = container_of(work, struct svm_range_bo, eviction_work); - if (!svm_bo_ref_unless_zero(svm_bo)) - return; /* svm_bo was freed while eviction was pending */ if (mmget_not_zero(svm_bo->eviction_fence->mm)) { mm = svm_bo->eviction_fence->mm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 705d9e91b5aa..029916971bf6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1513,17 +1513,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, /* CPU->CPU link*/ cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); if (cpu_dev) { - list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) - if (iolink3->node_to == iolink2->node_to) - break; + list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { + if (iolink3->node_to != iolink2->node_to) + continue; - props->weight += iolink3->weight; - props->min_latency += iolink3->min_latency; - props->max_latency += iolink3->max_latency; - props->min_bandwidth = min(props->min_bandwidth, - iolink3->min_bandwidth); - props->max_bandwidth = min(props->max_bandwidth, - iolink3->max_bandwidth); + props->weight += iolink3->weight; + props->min_latency += iolink3->min_latency; + props->max_latency += iolink3->max_latency; + props->min_bandwidth = min(props->min_bandwidth, + iolink3->min_bandwidth); + props->max_bandwidth = min(props->max_bandwidth, + iolink3->max_bandwidth); + break; + } } else { WARN(1, "CPU node not found"); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 13e0b521e3db..ff460c9802eb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2255,6 +2255,7 @@ static int dm_sw_fini(void *handle) if (adev->dm.dmub_srv) { dmub_srv_destroy(adev->dm.dmub_srv); + kfree(adev->dm.dmub_srv); adev->dm.dmub_srv = NULL; } @@ -5937,6 +5938,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false; int mode_refresh; int preferred_refresh = 0; + enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN; #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_dec_dpcd_caps dsc_caps; #endif @@ -6001,7 +6003,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); + saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio; drm_mode_copy(&mode, freesync_mode); + mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio; } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, scale); @@ -6068,7 +6072,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) stream->use_vsc_sdp_for_colorimetry = true; } - mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space); + if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) + tf = TRANSFER_FUNC_GAMMA_22; + mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } @@ -6677,8 +6683,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div) - mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_port->dc_link); + mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_port->dc_link); if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; @@ -10118,11 +10123,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } #if defined(CONFIG_DRM_AMD_DC_DCN) - ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); - if (ret) { - DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; - goto fail; + if (dc_resource_is_dsc_encoding_supported(dc)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { + DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); + ret = -EINVAL; + goto fail; + } } ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); @@ -10496,14 +10503,23 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (range->flags != 1) continue; - amdgpu_dm_connector->min_vfreq = range->min_vfreq; - amdgpu_dm_connector->max_vfreq = range->max_vfreq; - amdgpu_dm_connector->pixel_clock_mhz = - range->pixel_clock_mhz * 10; - connector->display_info.monitor_range.min_vfreq = range->min_vfreq; connector->display_info.monitor_range.max_vfreq = range->max_vfreq; + if (edid->revision >= 4) { + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MIN_VFREQ) + connector->display_info.monitor_range.min_vfreq += 255; + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MAX_VFREQ) + connector->display_info.monitor_range.max_vfreq += 255; + } + + amdgpu_dm_connector->min_vfreq = + connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = + connector->display_info.monitor_range.max_vfreq; + amdgpu_dm_connector->pixel_clock_mhz = + range->pixel_clock_mhz * 10; + break; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index ee242d9d8b06..ff7dd17ad076 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1358,7 +1358,7 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, const uint32_t rd_buf_size = 10; struct pipe_ctx *pipe_ctx; ssize_t result = 0; - int i, r, str_len = 30; + int i, r, str_len = 10; rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index e43b4d7dc60e..c8136a05a8d3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -131,30 +131,27 @@ static int dcn314_get_active_display_cnt_wa( return display_count; } -static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { - struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; - if (disable) { - if (stream_enc && stream_enc->funcs->disable_fifo) - pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); - - if (stream_enc && stream_enc->funcs->enable_fifo) - pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); } } } @@ -254,11 +251,11 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn314_disable_otg_wa(clk_mgr_base, context, true); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn314_disable_otg_wa(clk_mgr_base, context, false); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7a309547c2b3..f415733f1a97 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1903,6 +1903,10 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c wait_for_no_pipes_pending(dc, context); /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); + /* Need to do otg sync again as otg could be out of sync due to otg + * workaround applied during clock update + */ + dc_trigger_sync(dc, context); } if (dc->hwss.update_dsc_pg) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c index af110bf9470f..aefca9756dbe 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c @@ -202,7 +202,7 @@ enum dc_status core_link_read_dpcd( uint32_t extended_size; /* size of the remaining partitioned address space */ uint32_t size_left_to_read; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; /* size of the next partition to be read from */ uint32_t partition_size; uint32_t data_index = 0; @@ -231,7 +231,7 @@ enum dc_status core_link_write_dpcd( { uint32_t partition_size; uint32_t data_index = 0; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; while (size) { partition_size = dpcd_get_next_partition_size(address, size); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 66923f51037a..e2f80cd0ca8c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3038,6 +3038,12 @@ static void set_avi_info_frame( hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } + if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR && + stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) { + hdmi_info.bits.EC0_EC2 = 0; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; + } + /* TODO: un-hardcode aspect ratio */ aspect = stream->timing.aspect_ratio; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 46c2b991aa10..811c117665e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -244,7 +244,7 @@ enum pixel_format { #define DC_MAX_DIRTY_RECTS 3 struct dc_flip_addrs { struct dc_plane_address address; - unsigned int flip_timestamp_in_us; + unsigned long long flip_timestamp_in_us; bool flip_immediate; /* TODO: add flip duration for FreeSync */ bool triplebuffer_flips; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 009b5861a3fe..d6c5d48c878e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1854,6 +1854,9 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, { struct dpp *dpp = pipe_ctx->plane_res.dpp; + if (!stream) + return false; + if (dpp == NULL) return false; @@ -1876,8 +1879,8 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, } else dpp->funcs->dpp_program_regamma_pwl(dpp, NULL, OPP_REGAMMA_BYPASS); - if (stream != NULL && stream->ctx != NULL && - stream->out_transfer_func != NULL) { + if (stream->ctx && + stream->out_transfer_func) { log_tf(stream->ctx, stream->out_transfer_func, dpp->regamma_params.hw_points_num); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 72bec33e371f..0225b2c96041 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -651,10 +651,20 @@ void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) if (pipe_ctx == NULL) return; - if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) + if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) { pipe_ctx->stream_res.stream_enc->funcs->set_avmute( pipe_ctx->stream_res.stream_enc, enable); + + /* Wait for two frame to make sure AV mute is sent out */ + if (enable) { + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + } + } } void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index f04595b750ab..5ec3f50a72ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1014,7 +1014,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id vpg = dcn301_vpg_create(ctx, vpg_inst); afmt = dcn301_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ca7d24000621..6c7b286e1123 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -51,8 +51,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) @@ -60,11 +64,11 @@ ifdef CONFIG_DRM_AMD_DC_DCN CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 19f55657272e..cc8c1a48c5c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -810,6 +810,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman (v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, + /* Output */ &v->DSTXAfterScaler[k], &v->DSTYAfterScaler[k], @@ -3291,6 +3293,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightCThisState[k], v->TWait, (v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, /* Output */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 23e4be2ad63f..7f4fc49be35c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3418,6 +3418,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, @@ -3887,12 +3888,32 @@ bool dml32_CalculatePrefetchSchedule( /* Clamp to oto for bandwidth calculation */ LinesForPrefetchBandwidth = dst_y_prefetch_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; - /* Clamp to equ for bandwidth calculation */ - LinesForPrefetchBandwidth = dst_y_prefetch_equ; + /* For mode programming we want to extend the prefetch as much as possible + * (up to oto, or as long as we can for equ) if we're not already applying + * the 60us prefetch requirement. This is to avoid intermittent underflow + * issues during prefetch. + * + * The prefetch extension is applied under the following scenarios: + * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank) + * 2. We're using subvp or drr methods of p-state switch, in which case we + * we don't care if prefetch takes up more of the blanking time + * + * Mode programming typically chooses the smallest prefetch time possible + * (i.e. highest bandwidth during prefetch) presumably to create margin between + * p-states / c-states that happen in vblank and prefetch. Therefore we only + * apply this prefetch extension when p-state in vblank is not required (UCLK + * p-states take up the most vblank time). + */ + if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) { + MyError = true; + } else { + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + TimeForFetchingMetaPTE = Tvm_equ; + TimeForFetchingRowInVBlank = Tr0_equ; + *PrefetchBandwidth = prefetch_bw_equ; + /* Clamp to equ for bandwidth calculation */ + LinesForPrefetchBandwidth = dst_y_prefetch_equ; + } } *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 779c6805f599..1823434d8ede 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -744,6 +744,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index ee67a35c2a8e..ff930a71e496 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -513,6 +513,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp) hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + hdcp_cmd->in_msg.hdcp2_create_session_v2.display_handle = display->index; if (hdcp->connection.link.adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_0) diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h index 1d8b746b02f2..edf5845f6a1f 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h @@ -35,7 +35,8 @@ struct mod_vrr_params; void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs); + enum dc_color_space cs, + enum color_transfer_func tf); void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 27ceba9d6d65..69691058ab89 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -132,7 +132,8 @@ enum ColorimetryYCCDP { void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs) + enum dc_color_space cs, + enum color_transfer_func tf) { unsigned int vsc_packet_revision = vsc_packet_undefined; unsigned int i; @@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, colorimetryFormat = ColorimetryYCC_DP_AdobeYCC; else if (cs == COLOR_SPACE_2020_YCBCR) colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr; + + if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22) + colorimetryFormat = ColorimetryYCC_DP_ITU709; break; default: diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 29f3d8431089..cdb406690b7e 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2344,6 +2344,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); int err, ret; + u32 pwm_mode; int value; if (amdgpu_in_reset(adev)) @@ -2355,13 +2356,22 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, if (err) return err; + if (value == 0) + pwm_mode = AMD_FAN_CTRL_NONE; + else if (value == 1) + pwm_mode = AMD_FAN_CTRL_MANUAL; + else if (value == 2) + pwm_mode = AMD_FAN_CTRL_AUTO; + else + return -EINVAL; + ret = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (ret < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return ret; } - ret = amdgpu_dpm_set_fan_control_mode(adev, value); + ret = amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index dc0a6fba7050..ff1032de4f76 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c index f2a55c1413f5..17882f8dfdd3 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c @@ -200,7 +200,7 @@ static int get_platform_power_management_table( struct pp_hwmgr *hwmgr, ATOM_Tonga_PPM_Table *atom_ppm_table) { - struct phm_ppm_table *ptr = kzalloc(sizeof(ATOM_Tonga_PPM_Table), GFP_KERNEL); + struct phm_ppm_table *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); struct phm_ppt_v1_information *pp_table_information = (struct phm_ppt_v1_information *)(hwmgr->pptable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 47ff3694ffa5..1d0693dad818 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -24,6 +24,7 @@ #include #include +#include #include #include "amdgpu.h" @@ -731,16 +732,8 @@ static int smu_late_init(void *handle) * handle the switch automatically. Driver involvement * is unnecessary. */ - if (!smu->dc_controlled_by_gpio) { - ret = smu_set_power_source(smu, - adev->pm.ac_power ? SMU_POWER_SOURCE_AC : - SMU_POWER_SOURCE_DC); - if (ret) { - dev_err(adev->dev, "Failed to switch to %s mode!\n", - adev->pm.ac_power ? "AC" : "DC"); - return ret; - } - } + adev->pm.ac_power = power_supply_is_system_supplied() > 0; + smu_set_ac_dc(smu); if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 1)) || (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 3))) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index d490b571c8ff..2e061a74a0b7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1467,10 +1467,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 1b0fb93539ec..f3257cf4b06f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1415,10 +1415,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index cf86cc05b7fc..77a304ac4d75 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1300,10 +1300,32 @@ static void anx7625_config(struct anx7625_data *ctx) XTAL_FRQ_SEL, XTAL_FRQ_27M); } +static int anx7625_hpd_timer_config(struct anx7625_data *ctx) +{ + int ret; + + /* Set irq detect window to 2ms */ + ret = anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT0_7, HPD_TIME & 0xFF); + ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT8_15, + (HPD_TIME >> 8) & 0xFF); + ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT16_23, + (HPD_TIME >> 16) & 0xFF); + + return ret; +} + +static int anx7625_read_hpd_gpio_config_status(struct anx7625_data *ctx) +{ + return anx7625_reg_read(ctx, ctx->i2c.rx_p0_client, GPIO_CTRL_2); +} + static void anx7625_disable_pd_protocol(struct anx7625_data *ctx) { struct device *dev = &ctx->client->dev; - int ret; + int ret, val; /* Reset main ocm */ ret = anx7625_reg_write(ctx, ctx->i2c.rx_p0_client, 0x88, 0x40); @@ -1317,6 +1339,19 @@ static void anx7625_disable_pd_protocol(struct anx7625_data *ctx) DRM_DEV_DEBUG_DRIVER(dev, "disable PD feature fail.\n"); else DRM_DEV_DEBUG_DRIVER(dev, "disable PD feature succeeded.\n"); + + /* + * Make sure the HPD GPIO already be configured after OCM release before + * setting HPD detect window register. Here we poll the status register + * at maximum 40ms, then config HPD irq detect window register + */ + readx_poll_timeout(anx7625_read_hpd_gpio_config_status, + ctx, val, + ((val & HPD_SOURCE) || (val < 0)), + 2000, 2000 * 20); + + /* Set HPD irq detect window to 2ms */ + anx7625_hpd_timer_config(ctx); } static int anx7625_ocm_loading_check(struct anx7625_data *ctx) @@ -1440,20 +1475,6 @@ static void anx7625_start_dp_work(struct anx7625_data *ctx) static int anx7625_read_hpd_status_p0(struct anx7625_data *ctx) { - int ret; - - /* Set irq detect window to 2ms */ - ret = anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT0_7, HPD_TIME & 0xFF); - ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT8_15, - (HPD_TIME >> 8) & 0xFF); - ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT16_23, - (HPD_TIME >> 16) & 0xFF); - if (ret < 0) - return ret; - return anx7625_reg_read(ctx, ctx->i2c.rx_p0_client, SYSTEM_STSTUS); } @@ -1742,6 +1763,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, u8 request = msg->request & ~DP_AUX_I2C_MOT; int ret = 0; + mutex_lock(&ctx->aux_lock); pm_runtime_get_sync(dev); msg->reply = 0; switch (request) { @@ -1758,6 +1780,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, msg->size, msg->buffer); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ctx->aux_lock); return ret; } @@ -2454,7 +2477,9 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge, ctx->connector = NULL; anx7625_dp_stop(ctx); - pm_runtime_put_sync(dev); + mutex_lock(&ctx->aux_lock); + pm_runtime_put_sync_suspend(dev); + mutex_unlock(&ctx->aux_lock); } static enum drm_connector_status @@ -2648,6 +2673,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) mutex_init(&platform->lock); mutex_init(&platform->hdcp_wq_lock); + mutex_init(&platform->aux_lock); INIT_DELAYED_WORK(&platform->hdcp_work, hdcp_check_work_func); platform->hdcp_workqueue = create_workqueue("hdcp workqueue"); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 14f33d6be289..164250c8c8b6 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -259,6 +259,10 @@ #define AP_MIPI_RX_EN BIT(5) /* 1: MIPI RX input in 0: no RX in */ #define AP_DISABLE_PD BIT(6) #define AP_DISABLE_DISPLAY BIT(7) + +#define GPIO_CTRL_2 0x49 +#define HPD_SOURCE BIT(6) + /***************************************************************/ /* Register definition of device address 0x84 */ #define MIPI_PHY_CONTROL_3 0x03 @@ -471,6 +475,8 @@ struct anx7625_data { struct workqueue_struct *hdcp_workqueue; /* Lock for hdcp work queue */ struct mutex hdcp_wq_lock; + /* Lock for aux transfer and disable */ + struct mutex aux_lock; char edid_block; struct display_timing dt; u8 display_timing_valid; diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c index 0851101a8c72..8686b20ac317 100644 --- a/drivers/gpu/drm/bridge/nxp-ptn3460.c +++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c @@ -54,13 +54,13 @@ static int ptn3460_read_bytes(struct ptn3460_bridge *ptn_bridge, char addr, int ret; ret = i2c_master_send(ptn_bridge->client, &addr, 1); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); return ret; } ret = i2c_master_recv(ptn_bridge->client, buf, len); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to recv i2c data, ret=%d\n", ret); return ret; } @@ -78,7 +78,7 @@ static int ptn3460_write_byte(struct ptn3460_bridge *ptn_bridge, char addr, buf[1] = val; ret = i2c_master_send(ptn_bridge->client, buf, ARRAY_SIZE(buf)); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); return ret; } diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 083337a27966..09737acc2cf4 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -106,6 +106,7 @@ struct ps8640 { struct device_link *link; bool pre_enabled; bool need_post_hpd_delay; + struct mutex aux_lock; }; static const struct regmap_config ps8640_regmap_config[] = { @@ -353,11 +354,20 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev; int ret; + mutex_lock(&ps_bridge->aux_lock); pm_runtime_get_sync(dev); + ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); + if (ret) { + pm_runtime_put_sync_suspend(dev); + goto exit; + } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); +exit: + mutex_unlock(&ps_bridge->aux_lock); + return ret; } @@ -476,7 +486,18 @@ static void ps8640_post_disable(struct drm_bridge *bridge) ps_bridge->pre_enabled = false; ps8640_bridge_vdo_control(ps_bridge, DISABLE); + + /* + * The bridge seems to expect everything to be power cycled at the + * disable process, so grab a lock here to make sure + * ps8640_aux_transfer() is not holding a runtime PM reference and + * preventing the bridge from suspend. + */ + mutex_lock(&ps_bridge->aux_lock); + pm_runtime_put_sync_suspend(&ps_bridge->page[PAGE0_DP_CNTL]->dev); + + mutex_unlock(&ps_bridge->aux_lock); } static int ps8640_bridge_attach(struct drm_bridge *bridge, @@ -657,6 +678,8 @@ static int ps8640_probe(struct i2c_client *client) if (!ps_bridge) return -ENOMEM; + mutex_init(&ps_bridge->aux_lock); + ps_bridge->supplies[0].supply = "vdd12"; ps_bridge->supplies[1].supply = "vdd33"; ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies), diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 3cb850db9e99..9c5c235090b2 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -181,7 +181,6 @@ struct sii902x { struct gpio_desc *reset_gpio; struct gpio_desc *enable_gpio; struct i2c_mux_core *i2cmux; - struct regulator_bulk_data supplies[2]; bool sink_is_hdmi; /* * Mutex protects audio and video functions from interfering @@ -1308,6 +1307,26 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } + ret = sii902x_audio_codec_init(sii902x, dev); + if (ret) + return ret; + + i2c_set_clientdata(sii902x->i2c, sii902x); + + sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev, + 1, 0, I2C_MUX_GATE, + sii902x_i2c_bypass_select, + sii902x_i2c_bypass_deselect); + if (!sii902x->i2cmux) { + ret = -ENOMEM; + goto err_unreg_audio; + } + + sii902x->i2cmux->priv = sii902x; + ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + if (ret) + goto err_unreg_audio; + sii902x->bridge.funcs = &sii902x_bridge_funcs; sii902x->bridge.of_node = dev->of_node; sii902x->bridge.timings = &default_sii902x_timings; @@ -1318,19 +1337,13 @@ static int sii902x_init(struct sii902x *sii902x) drm_bridge_add(&sii902x->bridge); - sii902x_audio_codec_init(sii902x, dev); + return 0; - i2c_set_clientdata(sii902x->i2c, sii902x); +err_unreg_audio: + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); - sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev, - 1, 0, I2C_MUX_GATE, - sii902x_i2c_bypass_select, - sii902x_i2c_bypass_deselect); - if (!sii902x->i2cmux) - return -ENOMEM; - - sii902x->i2cmux->priv = sii902x; - return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + return ret; } static int sii902x_probe(struct i2c_client *client, @@ -1339,6 +1352,7 @@ static int sii902x_probe(struct i2c_client *client, struct device *dev = &client->dev; struct device_node *endpoint; struct sii902x *sii902x; + static const char * const supplies[] = {"iovcc", "cvcc12"}; int ret; u32 val; @@ -1419,39 +1433,22 @@ static int sii902x_probe(struct i2c_client *client, mutex_init(&sii902x->mutex); - sii902x->supplies[0].supply = "iovcc"; - sii902x->supplies[1].supply = "cvcc12"; - ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(sii902x->supplies), - sii902x->supplies); + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(supplies), supplies); if (ret < 0) - return ret; + return dev_err_probe(dev, ret, "Failed to enable supplies"); - ret = regulator_bulk_enable(ARRAY_SIZE(sii902x->supplies), - sii902x->supplies); - if (ret < 0) { - dev_err_probe(dev, ret, "Failed to enable supplies"); - return ret; - } - - ret = sii902x_init(sii902x); - if (ret < 0) { - dev_err(dev, "Failed to init sii902x %d\n", ret); - regulator_bulk_disable(ARRAY_SIZE(sii902x->supplies), - sii902x->supplies); - } - - return ret; + return sii902x_init(sii902x); } static void sii902x_remove(struct i2c_client *client) - { struct sii902x *sii902x = i2c_get_clientdata(client); - i2c_mux_del_adapters(sii902x->i2cmux); drm_bridge_remove(&sii902x->bridge); - regulator_bulk_disable(ARRAY_SIZE(sii902x->supplies), - sii902x->supplies); + i2c_mux_del_adapters(sii902x->i2cmux); + + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); } static const struct of_device_id sii902x_dt_ids[] = { diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 7098f125b54a..fd32041f8226 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index a8b4d918e9a3..d6a0572984b5 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -406,7 +406,7 @@ int drm_open(struct inode *inode, struct file *filp) { struct drm_device *dev; struct drm_minor *minor; - int retcode; + int retcode = 0; int need_setup = 0; minor = drm_minor_acquire(iminor(inode)); diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 2dd97473ca10..72ad1715f8e7 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -570,7 +570,7 @@ int drm_mode_getfb2_ioctl(struct drm_device *dev, struct drm_mode_fb_cmd2 *r = data; struct drm_framebuffer *fb; unsigned int i; - int ret; + int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 853208e8dd73..ef7ec68867df 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -346,7 +346,8 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv) { struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev); - mipi_dsi_detach(dsi); + if (dsi->attached) + mipi_dsi_detach(dsi); mipi_dsi_device_unregister(dsi); return 0; @@ -369,11 +370,18 @@ EXPORT_SYMBOL(mipi_dsi_host_unregister); int mipi_dsi_attach(struct mipi_dsi_device *dsi) { const struct mipi_dsi_host_ops *ops = dsi->host->ops; + int ret; if (!ops || !ops->attach) return -ENOSYS; - return ops->attach(dsi->host, dsi); + ret = ops->attach(dsi->host, dsi); + if (ret) + return ret; + + dsi->attached = true; + + return 0; } EXPORT_SYMBOL(mipi_dsi_attach); @@ -385,9 +393,14 @@ int mipi_dsi_detach(struct mipi_dsi_device *dsi) { const struct mipi_dsi_host_ops *ops = dsi->host->ops; + if (WARN_ON(!dsi->attached)) + return -EINVAL; + if (!ops || !ops->detach) return -ENOSYS; + dsi->attached = false; + return ops->detach(dsi->host, dsi); } EXPORT_SYMBOL(mipi_dsi_detach); diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index f634371c717a..7fd3de89ed07 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -207,19 +207,24 @@ EXPORT_SYMBOL(drm_panel_disable); * The modes probed from the panel are automatically added to the connector * that the panel is attached to. * - * Return: The number of modes available from the panel on success or a - * negative error code on failure. + * Return: The number of modes available from the panel on success, or 0 on + * failure (no modes). */ int drm_panel_get_modes(struct drm_panel *panel, struct drm_connector *connector) { if (!panel) - return -EINVAL; + return 0; - if (panel->funcs && panel->funcs->get_modes) - return panel->funcs->get_modes(panel, connector); + if (panel->funcs && panel->funcs->get_modes) { + int num; - return -EOPNOTSUPP; + num = panel->funcs->get_modes(panel, connector); + if (num > 0) + return num; + } + + return 0; } EXPORT_SYMBOL(drm_panel_get_modes); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 33357629a7f5..7a4af43e2cd1 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1382,6 +1382,7 @@ retry: out: if (fb) drm_framebuffer_put(fb); + fb = NULL; if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index eb09e86044c6..68a6d4b0ead7 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -828,7 +828,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, + (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 3b968ad187cf..52dbaf74fe16 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -362,6 +362,13 @@ static int drm_helper_probe_get_modes(struct drm_connector *connector) count = connector_funcs->get_modes(connector); + /* The .get_modes() callback should not return negative values. */ + if (count < 0) { + drm_err(connector->dev, ".get_modes() returned %pe\n", + ERR_PTR(count)); + count = 0; + } + /* * Fallback for when DDC probe failed in drm_get_edid() and thus skipped * override/firmware EDID. diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index da0145bc104a..8f2737075dc2 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -980,7 +980,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint64_t *points; uint32_t signaled_count, i; - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) lockdep_assert_none_held_once(); points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); @@ -1049,7 +1050,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, * fallthough and try a 0 timeout wait! */ - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { for (i = 0; i < count; ++i) drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 1d2b4fb4bcf8..f29952a55c05 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -488,7 +488,7 @@ static const struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 3, + .minor = 4, }; /* diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index f2fc645c7956..212e7050c4ba 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -135,6 +135,9 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu) { struct etnaviv_chip_identity *ident = &gpu->identity; + const u32 product_id = ident->product_id; + const u32 customer_id = ident->customer_id; + const u32 eco_id = ident->eco_id; int i; for (i = 0; i < ARRAY_SIZE(etnaviv_chip_identities); i++) { @@ -148,6 +151,12 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu) etnaviv_chip_identities[i].eco_id == ~0U)) { memcpy(ident, &etnaviv_chip_identities[i], sizeof(*ident)); + + /* Restore some id values as ~0U aka 'don't care' might been used. */ + ident->product_id = product_id; + ident->customer_id = customer_id; + ident->eco_id = eco_id; + return true; } } diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 8155d7e650f1..4bc5d9470acd 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -319,9 +319,9 @@ static void decon_win_set_bldmod(struct decon_context *ctx, unsigned int win, static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, struct drm_framebuffer *fb) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); unsigned int alpha = state->base.alpha; unsigned int pixel_alpha; unsigned long val; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 16c539657f73..4095b0d3ac2e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -309,6 +309,7 @@ err_mode_config_cleanup: drm_mode_config_cleanup(drm); exynos_drm_cleanup_dma(drm); kfree(private); + dev_set_drvdata(dev, NULL); err_free_drm: drm_dev_put(drm); @@ -323,6 +324,7 @@ static void exynos_drm_unbind(struct device *dev) exynos_drm_fbdev_fini(drm); drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); component_unbind_all(drm->dev, drm); drm_mode_config_cleanup(drm); @@ -360,9 +362,18 @@ static int exynos_drm_platform_remove(struct platform_device *pdev) return 0; } +static void exynos_drm_platform_shutdown(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + + if (drm) + drm_atomic_helper_shutdown(drm); +} + static struct platform_driver exynos_drm_platform_driver = { .probe = exynos_drm_platform_probe, .remove = exynos_drm_platform_remove, + .shutdown = exynos_drm_platform_shutdown, .driver = { .name = "exynos-drm", .pm = &exynos_drm_pm_ops, diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index ae6636e6658e..529033b980b2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -662,9 +662,9 @@ static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win, static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, struct drm_framebuffer *fb, int width) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); uint32_t pixel_format = fb->format->format; unsigned int alpha = state->base.alpha; u32 val = WINCONx_ENWIN; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 964dceb28c1e..68ea92742b06 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1342,7 +1342,7 @@ static int __maybe_unused gsc_runtime_resume(struct device *dev) for (i = 0; i < ctx->num_clocks; i++) { ret = clk_prepare_enable(ctx->clocks[i]); if (ret) { - while (--i > 0) + while (--i >= 0) clk_disable_unprepare(ctx->clocks[i]); return ret; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index f5e1adfcaa51..fb941a8c99f0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -316,14 +316,14 @@ static int vidi_get_modes(struct drm_connector *connector) */ if (!ctx->raw_edid) { DRM_DEV_DEBUG_KMS(ctx->dev, "raw_edid is null.\n"); - return -EFAULT; + return 0; } edid_len = (1 + ctx->raw_edid->extensions) * EDID_LENGTH; edid = kmemdup(ctx->raw_edid, edid_len, GFP_KERNEL); if (!edid) { DRM_DEV_DEBUG_KMS(ctx->dev, "failed to allocate edid\n"); - return -ENOMEM; + return 0; } drm_connector_update_edid_property(connector, edid); diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 1a7194a653ae..be2d9cbaaef2 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -887,11 +887,11 @@ static int hdmi_get_modes(struct drm_connector *connector) int ret; if (!hdata->ddc_adpt) - return -ENODEV; + return 0; edid = drm_get_edid(connector, hdata->ddc_adpt); if (!edid) - return -ENODEV; + return 0; hdata->dvi_mode = !connector->display_info.is_hdmi; DRM_DEV_DEBUG_KMS(hdata->dev, "%s : width[%d] x height[%d]\n", diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index a70b7061742a..9cc1ef2ca72c 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -3413,6 +3413,9 @@ static bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_enc { const struct child_device_config *child = &devdata->child; + if (!devdata) + return false; + if (!intel_bios_encoder_supports_dp(devdata) || !intel_bios_encoder_supports_hdmi(devdata)) return false; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index ba14b18d65f3..2e7c52c2e47d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -378,6 +378,9 @@ i915_gem_userptr_release(struct drm_i915_gem_object *obj) { GEM_WARN_ON(obj->userptr.page_ref); + if (!obj->userptr.notifier.mm) + return; + mmu_interval_notifier_remove(&obj->userptr.notifier); obj->userptr.notifier.mm = NULL; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b0a4a2dbe3ee..feb0fc32a19a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -253,9 +253,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index f903ee1ce06e..eae138b9f2df 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3270,6 +3270,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index daac2050d77d..6f531bb61f7e 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2844,8 +2844,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset, for (i = start; i < end; i += 4) { p = intel_gvt_find_mmio_info(gvt, i); if (p) { - WARN(1, "dup mmio definition offset %x\n", - info->offset); + WARN(1, "dup mmio definition offset %x\n", i); /* We return -EEXIST here to make GVT-g load fail. * So duplicated MMIO can be found as soon as diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 06723b2e9b84..64b6bc2de873 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -72,14 +72,14 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) int ret; if (!mode) - return -EINVAL; + return 0; ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); if (ret) { drm_mode_destroy(connector->dev, mode); - return ret; + return 0; } drm_mode_copy(mode, &imxpd->mode); diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index 0f1ca0b0db49..d72c5bf4e5ac 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -75,29 +75,34 @@ int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm) } else { bo->base.sgt = kmalloc(sizeof(*bo->base.sgt), GFP_KERNEL); if (!bo->base.sgt) { - sg_free_table(&sgt); - return -ENOMEM; + ret = -ENOMEM; + goto err_out0; } } ret = dma_map_sgtable(dev, &sgt, DMA_BIDIRECTIONAL, 0); - if (ret) { - sg_free_table(&sgt); - kfree(bo->base.sgt); - bo->base.sgt = NULL; - return ret; - } + if (ret) + goto err_out1; *bo->base.sgt = sgt; if (vm) { ret = lima_vm_map_bo(vm, bo, old_size >> PAGE_SHIFT); if (ret) - return ret; + goto err_out2; } bo->heap_size = new_size; return 0; + +err_out2: + dma_unmap_sgtable(dev, &sgt, DMA_BIDIRECTIONAL, 0); +err_out1: + kfree(bo->base.sgt); + bo->base.sgt = NULL; +err_out0: + sg_free_table(&sgt); + return ret; } int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 558000db4a10..beaaf44004cf 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -91,11 +91,13 @@ static void mtk_drm_crtc_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) struct drm_crtc *crtc = &mtk_crtc->base; unsigned long flags; - spin_lock_irqsave(&crtc->dev->event_lock, flags); - drm_crtc_send_vblank_event(crtc, mtk_crtc->event); - drm_crtc_vblank_put(crtc); - mtk_crtc->event = NULL; - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + if (mtk_crtc->event) { + spin_lock_irqsave(&crtc->dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, mtk_crtc->event); + drm_crtc_vblank_put(crtc); + mtk_crtc->event = NULL; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + } } static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 3e74c7c1b89f..d871b1dba083 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -70,8 +70,8 @@ #define DSI_PS_WC 0x3fff #define DSI_PS_SEL (3 << 16) #define PACKED_PS_16BIT_RGB565 (0 << 16) -#define LOOSELY_PS_18BIT_RGB666 (1 << 16) -#define PACKED_PS_18BIT_RGB666 (2 << 16) +#define PACKED_PS_18BIT_RGB666 (1 << 16) +#define LOOSELY_PS_24BIT_RGB666 (2 << 16) #define PACKED_PS_24BIT_RGB888 (3 << 16) #define DSI_VSA_NL 0x20 @@ -366,10 +366,10 @@ static void mtk_dsi_ps_control_vact(struct mtk_dsi *dsi) ps_bpp_mode |= PACKED_PS_24BIT_RGB888; break; case MIPI_DSI_FMT_RGB666: - ps_bpp_mode |= PACKED_PS_18BIT_RGB666; + ps_bpp_mode |= LOOSELY_PS_24BIT_RGB666; break; case MIPI_DSI_FMT_RGB666_PACKED: - ps_bpp_mode |= LOOSELY_PS_18BIT_RGB666; + ps_bpp_mode |= PACKED_PS_18BIT_RGB666; break; case MIPI_DSI_FMT_RGB565: ps_bpp_mode |= PACKED_PS_16BIT_RGB565; @@ -423,7 +423,7 @@ static void mtk_dsi_ps_control(struct mtk_dsi *dsi) dsi_tmp_buf_bpp = 3; break; case MIPI_DSI_FMT_RGB666: - tmp_reg = LOOSELY_PS_18BIT_RGB666; + tmp_reg = LOOSELY_PS_24BIT_RGB666; dsi_tmp_buf_bpp = 3; break; case MIPI_DSI_FMT_RGB666_PACKED: diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 119544d88b58..fbac39aa38cc 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -316,32 +316,34 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) goto exit_afbcd; if (has_components) { - ret = component_bind_all(drm->dev, drm); + ret = component_bind_all(dev, drm); if (ret) { dev_err(drm->dev, "Couldn't bind all components\n"); + /* Do not try to unbind */ + has_components = false; goto exit_afbcd; } } ret = meson_encoder_hdmi_init(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_plane_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_overlay_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_crtc_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) - goto unbind_all; + goto exit_afbcd; drm_mode_config_reset(drm); @@ -359,15 +361,18 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) uninstall_irq: free_irq(priv->vsync_irq, drm); -unbind_all: - if (has_components) - component_unbind_all(drm->dev, drm); exit_afbcd: if (priv->afbcd.ops) priv->afbcd.ops->exit(priv); free_drm: drm_dev_put(drm); + meson_encoder_hdmi_remove(priv); + meson_encoder_cvbs_remove(priv); + + if (has_components) + component_unbind_all(dev, drm); + return ret; } diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index 3f73b211fa8e..3407450435e2 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_CVBS]) { meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; drm_bridge_remove(&meson_encoder_cvbs->bridge); - drm_bridge_remove(meson_encoder_cvbs->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index b14e6e507c61..03062e7a02b6 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -472,6 +472,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_HDMI]) { meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; drm_bridge_remove(&meson_encoder_hdmi->bridge); - drm_bridge_remove(meson_encoder_hdmi->next_bridge); } } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index b0eb881f8af1..3632f0768aa9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -38,6 +38,9 @@ #define DPU_ERROR_ENC(e, fmt, ...) DPU_ERROR("enc%d " fmt,\ (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) +#define DPU_ERROR_ENC_RATELIMITED(e, fmt, ...) DPU_ERROR_RATELIMITED("enc%d " fmt,\ + (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) + /* * Two to anticipate panels that can do cmd/vid dynamic switching * plan is to create all possible physical encoder types, and switch between @@ -225,6 +228,13 @@ bool dpu_encoder_is_widebus_enabled(const struct drm_encoder *drm_enc) return dpu_enc->wide_bus_en; } +bool dpu_encoder_is_dsc_enabled(const struct drm_encoder *drm_enc) +{ + const struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); + + return dpu_enc->dsc ? true : false; +} + int dpu_encoder_get_crc_values_cnt(const struct drm_encoder *drm_enc) { struct dpu_encoder_virt *dpu_enc; @@ -1861,7 +1871,9 @@ static void dpu_encoder_prep_dsc(struct dpu_encoder_virt *dpu_enc, dsc_common_mode = 0; pic_width = dsc->pic_width; - dsc_common_mode = DSC_MODE_MULTIPLEX | DSC_MODE_SPLIT_PANEL; + dsc_common_mode = DSC_MODE_SPLIT_PANEL; + if (dpu_encoder_use_dsc_merge(enc_master->parent)) + dsc_common_mode |= DSC_MODE_MULTIPLEX; if (enc_master->intf_mode == INTF_MODE_VIDEO) dsc_common_mode |= DSC_MODE_VIDEO; @@ -2050,7 +2062,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) } /* reset the merge 3D HW block */ - if (phys_enc->hw_pp->merge_3d) { + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, BLEND_3D_NONE); if (phys_enc->hw_ctl->ops.update_pending_flush_merge_3d) @@ -2066,7 +2078,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) if (phys_enc->hw_wb) intf_cfg.wb = phys_enc->hw_wb->idx; - if (phys_enc->hw_pp->merge_3d) + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx; if (ctl->ops.reset_intf_cfg) @@ -2385,7 +2397,7 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) return; } - DPU_ERROR_ENC(dpu_enc, "frame done timeout\n"); + DPU_ERROR_ENC_RATELIMITED(dpu_enc, "frame done timeout\n"); event = DPU_ENCODER_FRAME_EVENT_ERROR; trace_dpu_enc_frame_done_timeout(DRMID(drm_enc), event); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h index 9e7236ef34e6..a71efa2b9e50 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h @@ -175,6 +175,13 @@ int dpu_encoder_get_vsync_count(struct drm_encoder *drm_enc); bool dpu_encoder_is_widebus_enabled(const struct drm_encoder *drm_enc); +/** + * dpu_encoder_is_dsc_enabled - indicate whether dsc is enabled + * for the encoder. + * @drm_enc: Pointer to previously created drm encoder structure + */ +bool dpu_encoder_is_dsc_enabled(const struct drm_encoder *drm_enc); + /** * dpu_encoder_get_crc_values_cnt - get number of physical encoders contained * in virtual encoder that can collect CRC values diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 2c14646661b7..2baade1cd487 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -100,6 +100,7 @@ static void drm_mode_to_intf_timing_params( } timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); + timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); /* * for DP, divide the horizonal parameters by 2 when @@ -256,12 +257,14 @@ static void dpu_encoder_phys_vid_setup_timing_engine( mode.htotal >>= 1; mode.hsync_start >>= 1; mode.hsync_end >>= 1; + mode.hskew >>= 1; DPU_DEBUG_VIDENC(phys_enc, - "split_role %d, halve horizontal %d %d %d %d\n", + "split_role %d, halve horizontal %d %d %d %d %d\n", phys_enc->split_role, mode.hdisplay, mode.htotal, - mode.hsync_start, mode.hsync_end); + mode.hsync_start, mode.hsync_end, + mode.hskew); } drm_mode_to_intf_timing_params(phys_enc, &mode, &timing_params); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index 384558d2f960..1debac4fcc3e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -154,13 +154,8 @@ static void dpu_hw_intf_setup_timing_engine(struct dpu_hw_intf *ctx, hsync_ctl = (hsync_period << 16) | p->hsync_pulse_width; display_hctl = (hsync_end_x << 16) | hsync_start_x; - /* - * DATA_HCTL_EN controls data timing which can be different from - * video timing. It is recommended to enable it for all cases, except - * if compression is enabled in 1 pixel per clock mode - */ if (p->wide_bus_en) - intf_cfg2 |= INTF_CFG2_DATABUS_WIDEN | INTF_CFG2_DATA_HCTL_EN; + intf_cfg2 |= INTF_CFG2_DATABUS_WIDEN; data_width = p->width; @@ -230,6 +225,14 @@ static void dpu_hw_intf_setup_timing_engine(struct dpu_hw_intf *ctx, DPU_REG_WRITE(c, INTF_CONFIG, intf_cfg); DPU_REG_WRITE(c, INTF_PANEL_FORMAT, panel_format); if (ctx->cap->features & BIT(DPU_DATA_HCTL_EN)) { + /* + * DATA_HCTL_EN controls data timing which can be different from + * video timing. It is recommended to enable it for all cases, except + * if compression is enabled in 1 pixel per clock mode + */ + if (!(p->compression_en && !p->wide_bus_en)) + intf_cfg2 |= INTF_CFG2_DATA_HCTL_EN; + DPU_REG_WRITE(c, INTF_CONFIG2, intf_cfg2); DPU_REG_WRITE(c, INTF_DISPLAY_DATA_HCTL, display_data_hctl); DPU_REG_WRITE(c, INTF_ACTIVE_DATA_HCTL, active_data_hctl); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h index e75339b96a1d..7f502c8bee1d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h @@ -33,6 +33,7 @@ struct intf_timing_params { u32 hsync_skew; bool wide_bus_en; + bool compression_en; }; struct intf_prog_fetch { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c index a3e413d27717..63dc2ee446d4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c @@ -105,6 +105,9 @@ static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx, dst_format |= BIT(14); /* DST_ALPHA_X */ } + if (DPU_FORMAT_IS_YUV(fmt)) + dst_format |= BIT(15); + pattern = (fmt->element[3] << 24) | (fmt->element[2] << 16) | (fmt->element[1] << 8) | diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index ed80ed6784ee..bb35aa5f5709 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -51,6 +51,7 @@ } while (0) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) +#define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) /** * ktime_compare_safe - compare two ktime structures diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 103eef9f059a..b20701893e5b 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -133,11 +133,6 @@ static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl) tbd = dp_link_get_test_bits_depth(ctrl->link, ctrl->panel->dp_mode.bpp); - if (tbd == DP_TEST_BIT_DEPTH_UNKNOWN) { - pr_debug("BIT_DEPTH not set. Configure default\n"); - tbd = DP_TEST_BIT_DEPTH_8; - } - config |= tbd << DP_CONFIGURATION_CTRL_BPC_SHIFT; /* Num of Lanes */ diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index cb66d1126ea9..ceb382fa56d5 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -7,6 +7,7 @@ #include +#include "dp_reg.h" #include "dp_link.h" #include "dp_panel.h" @@ -1075,7 +1076,7 @@ int dp_link_process_request(struct dp_link *dp_link) int dp_link_get_colorimetry_config(struct dp_link *dp_link) { - u32 cc; + u32 cc = DP_MISC0_COLORIMERY_CFG_LEGACY_RGB; struct dp_link_private *link; if (!dp_link) { @@ -1089,10 +1090,11 @@ int dp_link_get_colorimetry_config(struct dp_link *dp_link) * Unless a video pattern CTS test is ongoing, use RGB_VESA * Only RGB_VESA and RGB_CEA supported for now */ - if (dp_link_is_video_pattern_requested(link)) - cc = link->dp_link.test_video.test_dyn_range; - else - cc = DP_TEST_DYNAMIC_RANGE_VESA; + if (dp_link_is_video_pattern_requested(link)) { + if (link->dp_link.test_video.test_dyn_range & + DP_TEST_DYNAMIC_RANGE_CEA) + cc = DP_MISC0_COLORIMERY_CFG_CEA_RGB; + } return cc; } @@ -1172,6 +1174,9 @@ void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link) u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) { u32 tbd; + struct dp_link_private *link; + + link = container_of(dp_link, struct dp_link_private, dp_link); /* * Few simplistic rules and assumptions made here: @@ -1189,12 +1194,13 @@ u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) tbd = DP_TEST_BIT_DEPTH_10; break; default: - tbd = DP_TEST_BIT_DEPTH_UNKNOWN; + drm_dbg_dp(link->drm_dev, "bpp=%d not supported, use bpc=8\n", + bpp); + tbd = DP_TEST_BIT_DEPTH_8; break; } - if (tbd != DP_TEST_BIT_DEPTH_UNKNOWN) - tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); + tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); return tbd; } diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h index 268602803d9a..176a503ece9c 100644 --- a/drivers/gpu/drm/msm/dp/dp_reg.h +++ b/drivers/gpu/drm/msm/dp/dp_reg.h @@ -129,6 +129,9 @@ #define DP_MISC0_COLORIMETRY_CFG_SHIFT (0x00000001) #define DP_MISC0_TEST_BITS_DEPTH_SHIFT (0x00000005) +#define DP_MISC0_COLORIMERY_CFG_LEGACY_RGB (0) +#define DP_MISC0_COLORIMERY_CFG_CEA_RGB (0x04) + #define REG_DP_VALID_BOUNDARY (0x00000030) #define REG_DP_VALID_BOUNDARY_2 (0x00000034) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 62bc3756f2e2..c0bcf020ef66 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -673,6 +673,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(phy->ahb_clk), "Unable to get ahb clk\n"); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + return ret; + /* PLL init will call into clk_register which requires * register access, so we need to enable power and ahb clock. */ diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index d12ba47b37c4..0de3612135e9 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,8 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_flush_ops *tlb; + struct device *iommu_dev; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; @@ -194,11 +196,33 @@ static const struct msm_mmu_funcs pagetable_funcs = { static void msm_iommu_tlb_flush_all(void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, @@ -206,7 +230,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, { } -static const struct iommu_flush_ops null_tlb_ops = { +static const struct iommu_flush_ops tlb_ops = { .tlb_flush_all = msm_iommu_tlb_flush_all, .tlb_flush_walk = msm_iommu_tlb_flush_walk, .tlb_add_page = msm_iommu_tlb_add_page, @@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* The incoming cfg will have the TTBR1 quirk enabled */ ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - ttbr0_cfg.tlb = &null_tlb_ops; + ttbr0_cfg.tlb = &tlb_ops; pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, - &ttbr0_cfg, iommu->domain); + &ttbr0_cfg, pagetable); if (!pagetable->pgtbl_ops) { kfree(pagetable); @@ -282,6 +306,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* Needed later for TLB flush */ pagetable->parent = parent; + pagetable->tlb = ttbr1_cfg->tlb; + pagetable->iommu_dev = ttbr1_cfg->iommu_dev; pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 126b3c6e12f9..f2dca41e46c5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1194,6 +1194,8 @@ out: drm_vma_node_unmap(&nvbo->bo.base.vma_node, bdev->dev_mapping); nouveau_ttm_io_mem_free_locked(drm, nvbo->bo.resource); + nvbo->bo.resource->bus.offset = 0; + nvbo->bo.resource->bus.addr = NULL; goto retry; } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 20fe53815b20..6ca4a46a82ee 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -379,9 +379,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) dma_addr_t *dma_addrs; struct nouveau_fence *fence; - src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL); - dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL); - dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL); + src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); + dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); + dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL); migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, npages); @@ -407,11 +407,11 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) migrate_device_pages(src_pfns, dst_pfns, npages); nouveau_dmem_fence_done(&fence); migrate_device_finalize(src_pfns, dst_pfns, npages); - kfree(src_pfns); - kfree(dst_pfns); + kvfree(src_pfns); + kvfree(dst_pfns); for (i = 0; i < npages; i++) dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); - kfree(dma_addrs); + kvfree(dma_addrs); } void diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 31a5b81ee9fc..be6674fb1af7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -997,7 +997,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) if (ret) return ret; - buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); + buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL); if (!buffer->fault) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 67d6619fcd5e..29b5dedf6db8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -108,6 +108,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, } else { ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, mem->mem.size, &tmp); + if (ret) + goto done; + vma->addr = tmp.addr; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 19188683c8fc..8c2bf1c16f2a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name) return (void *)fw; } +static void +shadow_fw_release(void *fw) +{ + release_firmware(fw); +} + static const struct nvbios_source shadow_fw = { .name = "firmware", .init = shadow_fw_init, - .fini = (void(*)(void *))release_firmware, + .fini = shadow_fw_release, .read = shadow_fw_read, .rw = false, }; diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index a163585a2a52..bfcddd4aa932 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -203,6 +203,9 @@ struct edp_panel_entry { /** @name: Name of this panel (for printing to logs). */ const char *name; + + /** @override_edid_mode: Override the mode obtained by edid. */ + const struct drm_display_mode *override_edid_mode; }; struct panel_edp { @@ -301,6 +304,24 @@ static unsigned int panel_edp_get_display_modes(struct panel_edp *panel, return num; } +static int panel_edp_override_edid_mode(struct panel_edp *panel, + struct drm_connector *connector, + const struct drm_display_mode *override_mode) +{ + struct drm_display_mode *mode; + + mode = drm_mode_duplicate(connector->dev, override_mode); + if (!mode) { + dev_err(panel->base.dev, "failed to add additional mode\n"); + return 0; + } + + mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; + drm_mode_set_name(mode); + drm_mode_probed_add(connector, mode); + return 1; +} + static int panel_edp_get_non_edid_modes(struct panel_edp *panel, struct drm_connector *connector) { @@ -392,8 +413,7 @@ static int panel_edp_unprepare(struct drm_panel *panel) if (!p->prepared) return 0; - pm_runtime_mark_last_busy(panel->dev); - ret = pm_runtime_put_autosuspend(panel->dev); + ret = pm_runtime_put_sync_suspend(panel->dev); if (ret < 0) return ret; p->prepared = false; @@ -568,6 +588,9 @@ static int panel_edp_get_modes(struct drm_panel *panel, { struct panel_edp *p = to_panel_edp(panel); int num = 0; + bool has_override_edid_mode = p->detected_panel && + p->detected_panel != ERR_PTR(-EINVAL) && + p->detected_panel->override_edid_mode; /* probe EDID if a DDC bus is available */ if (p->ddc) { @@ -575,9 +598,18 @@ static int panel_edp_get_modes(struct drm_panel *panel, if (!p->edid) p->edid = drm_get_edid(connector, p->ddc); - - if (p->edid) - num += drm_add_edid_modes(connector, p->edid); + if (p->edid) { + if (has_override_edid_mode) { + /* + * override_edid_mode is specified. Use + * override_edid_mode instead of from edid. + */ + num += panel_edp_override_edid_mode(p, connector, + p->detected_panel->override_edid_mode); + } else { + num += drm_add_edid_modes(connector, p->edid); + } + } pm_runtime_mark_last_busy(panel->dev); pm_runtime_put_autosuspend(panel->dev); @@ -975,6 +1007,8 @@ static const struct panel_desc auo_b116xak01 = { }, .delay = { .hpd_absent = 200, + .unprepare = 500, + .enable = 50, }, }; @@ -1859,6 +1893,15 @@ static const struct panel_delay delay_200_500_e200 = { .delay = _delay \ } +#define EDP_PANEL_ENTRY2(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name, _mode) \ +{ \ + .name = _name, \ + .panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \ + product_id), \ + .delay = _delay, \ + .override_edid_mode = _mode \ +} + /* * This table is used to figure out power sequencing delays for panels that * are detected by EDID. Entries here may point to entries in the @@ -1870,7 +1913,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('A', 'U', 'O', 0x1062, &delay_200_500_e50, "B120XAN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1e9b, &delay_200_500_e50, "B133UAN02.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1ea5, &delay_200_500_e50, "B116XAK01.6"), - EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"), diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index a6bad7c63048..a492f10fa8db 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3920,6 +3920,7 @@ static const struct panel_desc tianma_tm070jdhg30 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct panel_desc tianma_tm070jvhg33 = { @@ -3932,6 +3933,7 @@ static const struct panel_desc tianma_tm070jvhg33 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct display_timing tianma_tm070rvhg71_timing = { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 927e5f42e97d..3e48cbb522a1 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -813,7 +813,7 @@ int ni_init_microcode(struct radeon_device *rdev) err = 0; } else if (rdev->smc_fw->size != smc_req_size) { pr_err("ni_mc: Bogus length %zu in firmware \"%s\"\n", - rdev->mc_fw->size, fw_name); + rdev->smc_fw->size, fw_name); err = -EINVAL; } } diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 1cd82e1f2d8a..424a000a1ea9 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -463,7 +463,7 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi, hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HBLANK_L, value & 0xFF); hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HBLANK_H, (value >> 8) & 0xFF); - value = mode->hsync_start - mode->hdisplay; + value = mode->htotal - mode->hsync_start; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDELAY_L, value & 0xFF); hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDELAY_H, (value >> 8) & 0xFF); @@ -478,7 +478,7 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi, value = mode->vtotal - mode->vdisplay; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VBLANK, value & 0xFF); - value = mode->vsync_start - mode->vdisplay; + value = mode->vtotal - mode->vsync_start; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VDELAY, value & 0xFF); value = mode->vsync_end - mode->vsync_start; diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index d773ef485418..b563988fb684 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -524,7 +524,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev) if (err < 0) { dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n", dpaux->irq, err); - return err; + goto err_pm_disable; } disable_irq(dpaux->irq); @@ -544,7 +544,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev) */ err = tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_I2C); if (err < 0) - return err; + goto err_pm_disable; #ifdef CONFIG_GENERIC_PINCONF dpaux->desc.name = dev_name(&pdev->dev); @@ -557,7 +557,8 @@ static int tegra_dpaux_probe(struct platform_device *pdev) dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux); if (IS_ERR(dpaux->pinctrl)) { dev_err(&pdev->dev, "failed to register pincontrol\n"); - return PTR_ERR(dpaux->pinctrl); + err = PTR_ERR(dpaux->pinctrl); + goto err_pm_disable; } #endif /* enable and clear all interrupts */ @@ -573,10 +574,15 @@ static int tegra_dpaux_probe(struct platform_device *pdev) err = devm_of_dp_aux_populate_ep_devices(&dpaux->aux); if (err < 0) { dev_err(dpaux->dev, "failed to populate AUX bus: %d\n", err); - return err; + goto err_pm_disable; } return 0; + +err_pm_disable: + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return err; } static int tegra_dpaux_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 5fc55b9777cb..6806779f8ecc 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1252,9 +1252,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = tegra_drm_fb_init(drm); if (err < 0) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index de1333dc0d86..7bb26655cb3c 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1534,9 +1534,11 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi) np = of_parse_phandle(dsi->dev->of_node, "nvidia,ganged-mode", 0); if (np) { struct platform_device *gangster = of_find_device_by_node(np); + of_node_put(np); + if (!gangster) + return -EPROBE_DEFER; dsi->slave = platform_get_drvdata(gangster); - of_node_put(np); if (!dsi->slave) { put_device(&gangster->dev); @@ -1584,48 +1586,58 @@ static int tegra_dsi_probe(struct platform_device *pdev) if (!pdev->dev.pm_domain) { dsi->rst = devm_reset_control_get(&pdev->dev, "dsi"); - if (IS_ERR(dsi->rst)) - return PTR_ERR(dsi->rst); + if (IS_ERR(dsi->rst)) { + err = PTR_ERR(dsi->rst); + goto remove; + } } dsi->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(dsi->clk)) { - dev_err(&pdev->dev, "cannot get DSI clock\n"); - return PTR_ERR(dsi->clk); + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk), + "cannot get DSI clock\n"); + goto remove; } dsi->clk_lp = devm_clk_get(&pdev->dev, "lp"); if (IS_ERR(dsi->clk_lp)) { - dev_err(&pdev->dev, "cannot get low-power clock\n"); - return PTR_ERR(dsi->clk_lp); + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_lp), + "cannot get low-power clock\n"); + goto remove; } dsi->clk_parent = devm_clk_get(&pdev->dev, "parent"); if (IS_ERR(dsi->clk_parent)) { - dev_err(&pdev->dev, "cannot get parent clock\n"); - return PTR_ERR(dsi->clk_parent); + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_parent), + "cannot get parent clock\n"); + goto remove; } dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi"); if (IS_ERR(dsi->vdd)) { - dev_err(&pdev->dev, "cannot get VDD supply\n"); - return PTR_ERR(dsi->vdd); + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->vdd), + "cannot get VDD supply\n"); + goto remove; } err = tegra_dsi_setup_clocks(dsi); if (err < 0) { dev_err(&pdev->dev, "cannot setup clocks\n"); - return err; + goto remove; } regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); dsi->regs = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(dsi->regs)) - return PTR_ERR(dsi->regs); + if (IS_ERR(dsi->regs)) { + err = PTR_ERR(dsi->regs); + goto remove; + } dsi->mipi = tegra_mipi_request(&pdev->dev, pdev->dev.of_node); - if (IS_ERR(dsi->mipi)) - return PTR_ERR(dsi->mipi); + if (IS_ERR(dsi->mipi)) { + err = PTR_ERR(dsi->mipi); + goto remove; + } dsi->host.ops = &tegra_dsi_host_ops; dsi->host.dev = &pdev->dev; @@ -1653,9 +1665,12 @@ static int tegra_dsi_probe(struct platform_device *pdev) return 0; unregister: + pm_runtime_disable(&pdev->dev); mipi_dsi_host_unregister(&dsi->host); mipi_free: tegra_mipi_free(dsi->mipi); +remove: + tegra_output_remove(&dsi->output); return err; } diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 9291209154a7..a688ecf08451 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -166,6 +166,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, if (gem->size < size) { err = -EINVAL; + drm_gem_object_put(gem); goto unreference; } diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index bf240767dad9..c66764c0bd25 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -1776,7 +1776,6 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data) static int tegra_hdmi_probe(struct platform_device *pdev) { struct tegra_hdmi *hdmi; - struct resource *regs; int err; hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); @@ -1838,14 +1837,15 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) return err; - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - hdmi->regs = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(hdmi->regs)) - return PTR_ERR(hdmi->regs); + hdmi->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(hdmi->regs)) { + err = PTR_ERR(hdmi->regs); + goto remove; + } err = platform_get_irq(pdev, 0); if (err < 0) - return err; + goto remove; hdmi->irq = err; @@ -1854,18 +1854,18 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", hdmi->irq, err); - return err; + goto remove; } platform_set_drvdata(pdev, hdmi); err = devm_pm_runtime_enable(&pdev->dev); if (err) - return err; + goto remove; err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); if (err) - return err; + goto remove; INIT_LIST_HEAD(&hdmi->client.list); hdmi->client.ops = &hdmi_client_ops; @@ -1875,10 +1875,14 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) { dev_err(&pdev->dev, "failed to register host1x client: %d\n", err); - return err; + goto remove; } return 0; + +remove: + tegra_output_remove(&hdmi->output); + return err; } static int tegra_hdmi_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 47d26b5d9945..7ccd010a821b 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -139,8 +139,10 @@ int tegra_output_probe(struct tegra_output *output) GPIOD_IN, "HDMI hotplug detect"); if (IS_ERR(output->hpd_gpio)) { - if (PTR_ERR(output->hpd_gpio) != -ENOENT) - return PTR_ERR(output->hpd_gpio); + if (PTR_ERR(output->hpd_gpio) != -ENOENT) { + err = PTR_ERR(output->hpd_gpio); + goto put_i2c; + } output->hpd_gpio = NULL; } @@ -149,7 +151,7 @@ int tegra_output_probe(struct tegra_output *output) err = gpiod_to_irq(output->hpd_gpio); if (err < 0) { dev_err(output->dev, "gpiod_to_irq(): %d\n", err); - return err; + goto put_i2c; } output->hpd_irq = err; @@ -162,7 +164,7 @@ int tegra_output_probe(struct tegra_output *output) if (err < 0) { dev_err(output->dev, "failed to request IRQ#%u: %d\n", output->hpd_irq, err); - return err; + goto put_i2c; } output->connector.polled = DRM_CONNECTOR_POLL_HPD; @@ -176,6 +178,12 @@ int tegra_output_probe(struct tegra_output *output) } return 0; + +put_i2c: + if (output->ddc) + i2c_put_adapter(output->ddc); + + return err; } void tegra_output_remove(struct tegra_output *output) diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index ff8fce36d2aa..86e55e5d12b3 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -214,26 +214,28 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) rgb->clk = devm_clk_get(dc->dev, NULL); if (IS_ERR(rgb->clk)) { dev_err(dc->dev, "failed to get clock\n"); - return PTR_ERR(rgb->clk); + err = PTR_ERR(rgb->clk); + goto remove; } rgb->clk_parent = devm_clk_get(dc->dev, "parent"); if (IS_ERR(rgb->clk_parent)) { dev_err(dc->dev, "failed to get parent clock\n"); - return PTR_ERR(rgb->clk_parent); + err = PTR_ERR(rgb->clk_parent); + goto remove; } err = clk_set_parent(rgb->clk, rgb->clk_parent); if (err < 0) { dev_err(dc->dev, "failed to set parent clock: %d\n", err); - return err; + goto remove; } rgb->pll_d_out0 = clk_get_sys(NULL, "pll_d_out0"); if (IS_ERR(rgb->pll_d_out0)) { err = PTR_ERR(rgb->pll_d_out0); dev_err(dc->dev, "failed to get pll_d_out0: %d\n", err); - return err; + goto remove; } if (dc->soc->has_pll_d2_out0) { @@ -241,13 +243,19 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) if (IS_ERR(rgb->pll_d2_out0)) { err = PTR_ERR(rgb->pll_d2_out0); dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err); - return err; + goto put_pll; } } dc->rgb = &rgb->output; return 0; + +put_pll: + clk_put(rgb->pll_d_out0); +remove: + tegra_output_remove(&rgb->output); + return err; } int tegra_dc_rgb_remove(struct tegra_dc *dc) diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c index cd3c43a6c806..896a77853ebc 100644 --- a/drivers/gpu/drm/tidss/tidss_crtc.c +++ b/drivers/gpu/drm/tidss/tidss_crtc.c @@ -170,13 +170,13 @@ static void tidss_crtc_atomic_flush(struct drm_crtc *crtc, struct tidss_device *tidss = to_tidss(ddev); unsigned long flags; - dev_dbg(ddev->dev, - "%s: %s enabled %d, needs modeset %d, event %p\n", __func__, - crtc->name, drm_atomic_crtc_needs_modeset(crtc->state), - crtc->state->enable, crtc->state->event); + dev_dbg(ddev->dev, "%s: %s is %sactive, %s modeset, event %p\n", + __func__, crtc->name, crtc->state->active ? "" : "not ", + drm_atomic_crtc_needs_modeset(crtc->state) ? "needs" : "doesn't need", + crtc->state->event); /* There is nothing to do if CRTC is not going to be enabled. */ - if (!crtc->state->enable) + if (!crtc->state->active) return; /* @@ -270,6 +270,16 @@ static void tidss_crtc_atomic_disable(struct drm_crtc *crtc, reinit_completion(&tcrtc->framedone_completion); + /* + * If a layer is left enabled when the videoport is disabled, and the + * vid pipeline that was used for the layer is taken into use on + * another videoport, the DSS will report sync lost issues. Disable all + * the layers here as a work-around. + */ + for (u32 layer = 0; layer < tidss->feat->num_planes; layer++) + dispc_ovr_enable_layer(tidss->dispc, tcrtc->hw_videoport, layer, + false); + dispc_vp_disable(tidss->dispc, tcrtc->hw_videoport); if (!wait_for_completion_timeout(&tcrtc->framedone_completion, diff --git a/drivers/gpu/drm/tidss/tidss_plane.c b/drivers/gpu/drm/tidss/tidss_plane.c index 42d50ec5526d..435b3b66ae63 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.c +++ b/drivers/gpu/drm/tidss/tidss_plane.c @@ -211,7 +211,7 @@ struct tidss_plane *tidss_plane_create(struct tidss_device *tidss, drm_plane_helper_add(&tplane->plane, &tidss_plane_helper_funcs); - drm_plane_create_zpos_property(&tplane->plane, hw_plane_id, 0, + drm_plane_create_zpos_property(&tplane->plane, tidss->num_planes, 0, num_planes - 1); ret = drm_plane_create_color_properties(&tplane->plane, diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 86affe987a1c..393b97b4a991 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -383,7 +383,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, enum ttm_caching caching, pgoff_t start_page, pgoff_t end_page) { - struct page **pages = tt->pages; + struct page **pages = &tt->pages[start_page]; unsigned int order; pgoff_t i, nr; diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index ea2eaf6032ca..f69681891349 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -497,7 +497,7 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector) edid = drm_get_edid(connector, vc4_hdmi->ddc); cec_s_phys_addr_from_edid(vc4_hdmi->cec_adap, edid); if (!edid) - return -ENODEV; + return 0; drm_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 0035affc3e59..9b2d235168bb 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -93,6 +93,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev) goto err_free; } + dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX); ret = virtio_gpu_init(vdev, dev); if (ret) goto err_free; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b909a3ce9af3..9d7a1b710f48 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1429,12 +1429,15 @@ static void vmw_debugfs_resource_managers_init(struct vmw_private *vmw) root, "system_ttm"); ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM), root, "vram_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), - root, "gmr_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), - root, "mob_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), - root, "system_mob_ttm"); + if (vmw->has_gmr) + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), + root, "gmr_ttm"); + if (vmw->has_mob) { + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), + root, "mob_ttm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), + root, "system_mob_ttm"); + } } static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index bc7f02e4eceb..2f7ac91149fc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -447,7 +447,7 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, vmw_res_type(ctx) == vmw_res_dx_context) { for (i = 0; i < cotable_max; ++i) { res = vmw_context_cotable(ctx, i); - if (IS_ERR(res)) + if (IS_ERR_OR_NULL(res)) continue; ret = vmw_execbuf_res_val_add(sw_context, res, @@ -1259,6 +1259,8 @@ static int vmw_cmd_dx_define_query(struct vmw_private *dev_priv, return -EINVAL; cotable_res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_DXQUERY); + if (IS_ERR_OR_NULL(cotable_res)) + return cotable_res ? PTR_ERR(cotable_res) : -EINVAL; ret = vmw_cotable_notify(cotable_res, cmd->body.queryId); return ret; @@ -2477,6 +2479,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, return ret; res = vmw_context_cotable(ctx_node->ctx, vmw_view_cotables[view_type]); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->defined_id); if (unlikely(ret != 0)) return ret; @@ -2562,8 +2566,8 @@ static int vmw_cmd_dx_so_define(struct vmw_private *dev_priv, so_type = vmw_so_cmd_to_type(header->id); res = vmw_context_cotable(ctx_node->ctx, vmw_so_cotables[so_type]); - if (IS_ERR(res)) - return PTR_ERR(res); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; cmd = container_of(header, typeof(*cmd), header); ret = vmw_cotable_notify(res, cmd->defined_id); @@ -2682,6 +2686,8 @@ static int vmw_cmd_dx_define_shader(struct vmw_private *dev_priv, return -EINVAL; res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_DXSHADER); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->body.shaderId); if (ret) return ret; @@ -3003,6 +3009,8 @@ static int vmw_cmd_dx_define_streamoutput(struct vmw_private *dev_priv, } res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_STREAMOUTPUT); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->body.soid); if (ret) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 60e3cc537f36..b9e5c8cd3100 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -65,8 +65,11 @@ static int vmw_gmrid_man_get_node(struct ttm_resource_manager *man, ttm_resource_init(bo, place, *res); id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL); - if (id < 0) + if (id < 0) { + ttm_resource_fini(man, *res); + kfree(*res); return id; + } spin_lock(&gman->lock); diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index c936d6a51c0c..9c963ad27f9d 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -34,6 +34,8 @@ static int sensor_mask_override = -1; module_param_named(sensor_mask, sensor_mask_override, int, 0444); MODULE_PARM_DESC(sensor_mask, "override the detected sensors mask"); +static bool intr_disable = true; + static int amd_sfh_wait_response_v2(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts) { union cmd_response cmd_resp; @@ -54,7 +56,7 @@ static void amd_start_sensor_v2(struct amd_mp2_dev *privdata, struct amd_mp2_sen cmd_base.ul = 0; cmd_base.cmd_v2.cmd_id = ENABLE_SENSOR; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = info.period; cmd_base.cmd_v2.sensor_id = info.sensor_idx; cmd_base.cmd_v2.length = 16; @@ -72,7 +74,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx) cmd_base.ul = 0; cmd_base.cmd_v2.cmd_id = DISABLE_SENSOR; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = 0; cmd_base.cmd_v2.sensor_id = sensor_idx; cmd_base.cmd_v2.length = 16; @@ -86,7 +88,7 @@ static void amd_stop_all_sensor_v2(struct amd_mp2_dev *privdata) union sfh_cmd_base cmd_base; cmd_base.cmd_v2.cmd_id = STOP_ALL_SENSORS; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = 0; cmd_base.cmd_v2.sensor_id = 0; @@ -288,6 +290,26 @@ int amd_sfh_irq_init(struct amd_mp2_dev *privdata) return 0; } +static int mp2_disable_intr(const struct dmi_system_id *id) +{ + intr_disable = false; + return 0; +} + +static const struct dmi_system_id dmi_sfh_table[] = { + { + /* + * https://bugzilla.kernel.org/show_bug.cgi?id=218104 + */ + .callback = mp2_disable_intr, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook x360 435 G7"), + }, + }, + {} +}; + static const struct dmi_system_id dmi_nodevs[] = { { /* @@ -311,6 +333,8 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i if (dmi_first_match(dmi_nodevs)) return -ENODEV; + dmi_check_system(dmi_sfh_table); + privdata = devm_kzalloc(&pdev->dev, sizeof(*privdata), GFP_KERNEL); if (!privdata) return -ENOMEM; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index dfb7cabd82ef..2b125cd9742c 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -89,10 +89,10 @@ enum mem_use_type { struct hpd_status { union { struct { - u32 human_presence_report : 4; - u32 human_presence_actual : 4; - u32 probablity : 8; u32 object_distance : 16; + u32 probablity : 8; + u32 human_presence_actual : 4; + u32 human_presence_report : 4; } shpd; u32 val; }; diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 149a3c74346b..f86c1ea83a03 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -54,10 +54,10 @@ struct lenovo_drvdata { /* 0: Up * 1: Down (undecided) * 2: Scrolling - * 3: Patched firmware, disable workaround */ u8 middlebutton_state; bool fn_lock; + bool middleclick_workaround_cptkbd; }; #define map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c)) @@ -621,6 +621,36 @@ static ssize_t attr_sensitivity_store_cptkbd(struct device *dev, return count; } +static ssize_t attr_middleclick_workaround_show_cptkbd(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hid_device *hdev = to_hid_device(dev); + struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); + + return snprintf(buf, PAGE_SIZE, "%u\n", + cptkbd_data->middleclick_workaround_cptkbd); +} + +static ssize_t attr_middleclick_workaround_store_cptkbd(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct hid_device *hdev = to_hid_device(dev); + struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); + int value; + + if (kstrtoint(buf, 10, &value)) + return -EINVAL; + if (value < 0 || value > 1) + return -EINVAL; + + cptkbd_data->middleclick_workaround_cptkbd = !!value; + + return count; +} + static struct device_attribute dev_attr_fn_lock = __ATTR(fn_lock, S_IWUSR | S_IRUGO, @@ -632,10 +662,16 @@ static struct device_attribute dev_attr_sensitivity_cptkbd = attr_sensitivity_show_cptkbd, attr_sensitivity_store_cptkbd); +static struct device_attribute dev_attr_middleclick_workaround_cptkbd = + __ATTR(middleclick_workaround, S_IWUSR | S_IRUGO, + attr_middleclick_workaround_show_cptkbd, + attr_middleclick_workaround_store_cptkbd); + static struct attribute *lenovo_attributes_cptkbd[] = { &dev_attr_fn_lock.attr, &dev_attr_sensitivity_cptkbd.attr, + &dev_attr_middleclick_workaround_cptkbd.attr, NULL }; @@ -686,23 +722,7 @@ static int lenovo_event_cptkbd(struct hid_device *hdev, { struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); - if (cptkbd_data->middlebutton_state != 3) { - /* REL_X and REL_Y events during middle button pressed - * are only possible on patched, bug-free firmware - * so set middlebutton_state to 3 - * to never apply workaround anymore - */ - if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD && - cptkbd_data->middlebutton_state == 1 && - usage->type == EV_REL && - (usage->code == REL_X || usage->code == REL_Y)) { - cptkbd_data->middlebutton_state = 3; - /* send middle button press which was hold before */ - input_event(field->hidinput->input, - EV_KEY, BTN_MIDDLE, 1); - input_sync(field->hidinput->input); - } - + if (cptkbd_data->middleclick_workaround_cptkbd) { /* "wheel" scroll events */ if (usage->type == EV_REL && (usage->code == REL_WHEEL || usage->code == REL_HWHEEL)) { @@ -1166,6 +1186,7 @@ static int lenovo_probe_cptkbd(struct hid_device *hdev) cptkbd_data->middlebutton_state = 0; cptkbd_data->fn_lock = true; cptkbd_data->sensitivity = 0x05; + cptkbd_data->middleclick_workaround_cptkbd = true; lenovo_features_set_cptkbd(hdev); ret = sysfs_create_group(&hdev->dev.kobj, &lenovo_attr_group_cptkbd); diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 5ec1f174127a..3816fd06bc95 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2153,6 +2153,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xcd7e) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xcddc) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index b617aada50b0..7b097ab2170c 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -355,8 +355,11 @@ static int hidraw_release(struct inode * inode, struct file * file) down_write(&minors_rwsem); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); - for (int i = list->tail; i < list->head; i++) - kfree(list->buffer[i].value); + while (list->tail != list->head) { + kfree(list->buffer[list->tail].value); + list->buffer[list->tail].value = NULL; + list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1); + } list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c index 97a27a803f58..6feb812fce37 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of.c +++ b/drivers/hid/i2c-hid/i2c-hid-of.c @@ -80,6 +80,7 @@ static int i2c_hid_of_probe(struct i2c_client *client, if (!ihid_of) return -ENOMEM; + ihid_of->client = client; ihid_of->ops.power_up = i2c_hid_of_power_up; ihid_of->ops.power_down = i2c_hid_of_power_down; diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index af163e8dfec0..12d4c28741d7 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2080,7 +2080,7 @@ static int wacom_allocate_inputs(struct wacom *wacom) return 0; } -static int wacom_register_inputs(struct wacom *wacom) +static int wacom_setup_inputs(struct wacom *wacom) { struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; struct wacom_wac *wacom_wac = &(wacom->wacom_wac); @@ -2099,10 +2099,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pen_input_dev); wacom_wac->pen_input = NULL; pen_input_dev = NULL; - } else { - error = input_register_device(pen_input_dev); - if (error) - goto fail; } error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac); @@ -2111,10 +2107,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(touch_input_dev); wacom_wac->touch_input = NULL; touch_input_dev = NULL; - } else { - error = input_register_device(touch_input_dev); - if (error) - goto fail; } error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac); @@ -2123,7 +2115,34 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pad_input_dev); wacom_wac->pad_input = NULL; pad_input_dev = NULL; - } else { + } + + return 0; +} + +static int wacom_register_inputs(struct wacom *wacom) +{ + struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; + struct wacom_wac *wacom_wac = &(wacom->wacom_wac); + int error = 0; + + pen_input_dev = wacom_wac->pen_input; + touch_input_dev = wacom_wac->touch_input; + pad_input_dev = wacom_wac->pad_input; + + if (pen_input_dev) { + error = input_register_device(pen_input_dev); + if (error) + goto fail; + } + + if (touch_input_dev) { + error = input_register_device(touch_input_dev); + if (error) + goto fail; + } + + if (pad_input_dev) { error = input_register_device(pad_input_dev); if (error) goto fail; @@ -2379,6 +2398,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } + error = wacom_setup_inputs(wacom); + if (error) + goto fail; + + if (features->type == HID_GENERIC) + connect_mask |= HID_CONNECT_DRIVER; + + /* Regular HID work starts now */ + error = hid_hw_start(hdev, connect_mask); + if (error) { + hid_err(hdev, "hw start failed\n"); + goto fail; + } + error = wacom_register_inputs(wacom); if (error) goto fail; @@ -2393,16 +2426,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } - if (features->type == HID_GENERIC) - connect_mask |= HID_CONNECT_DRIVER; - - /* Regular HID work starts now */ - error = hid_hw_start(hdev, connect_mask); - if (error) { - hid_err(hdev, "hw start failed\n"); - goto fail; - } - if (!wireless) { /* Note that if query fails it is not a hard failure */ wacom_query_tablet_data(wacom); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 165ed872fa4e..53235b276bb2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2571,7 +2571,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev, wacom_wac->hid_data.tipswitch); input_report_key(input, wacom_wac->tool[0], sense); if (wacom_wac->serial[0]) { - input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]); + /* + * xf86-input-wacom does not accept a serial number + * of '0'. Report the low 32 bits if possible, but + * if they are zero, report the upper ones instead. + */ + __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu; + __u32 serial_hi = wacom_wac->serial[0] >> 32; + input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi)); input_report_abs(input, ABS_MISC, sense ? id : 0); } diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index 3bfd12ff4b3c..6868db4ac84f 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -934,10 +934,21 @@ static const struct i2c_device_id amc6821_id[] = { MODULE_DEVICE_TABLE(i2c, amc6821_id); +static const struct of_device_id __maybe_unused amc6821_of_match[] = { + { + .compatible = "ti,amc6821", + .data = (void *)amc6821, + }, + { } +}; + +MODULE_DEVICE_TABLE(of, amc6821_of_match); + static struct i2c_driver amc6821_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "amc6821", + .of_match_table = of_match_ptr(amc6821_of_match), }, .probe_new = amc6821_probe, .id_table = amc6821_id, diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index d11f674e3dc3..51f321bcd778 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -194,6 +194,8 @@ struct aspeed_pwm_tacho_data { u8 fan_tach_ch_source[16]; struct aspeed_cooling_device *cdev[8]; const struct attribute_group *groups[3]; + /* protects access to shared ASPEED_PTCR_RESULT */ + struct mutex tach_lock; }; enum type { TYPEM, TYPEN, TYPEO }; @@ -528,6 +530,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, u8 fan_tach_ch_source, type, mode, both; int ret; + mutex_lock(&priv->tach_lock); + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0); regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch); @@ -545,6 +549,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, ASPEED_RPM_STATUS_SLEEP_USEC, usec); + mutex_unlock(&priv->tach_lock); + /* return -ETIMEDOUT if we didn't get an answer. */ if (ret) return ret; @@ -904,6 +910,7 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev) priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + mutex_init(&priv->tach_lock); priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs, &aspeed_pwm_tacho_regmap_config); if (IS_ERR(priv->regmap)) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 09aab5859fa7..c0aa6bfa66b2 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -40,7 +40,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); #define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */ #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */ +#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */ #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) @@ -380,7 +380,7 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) } static int create_core_attrs(struct temp_data *tdata, struct device *dev, - int attr_no) + int index) { int i; static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev, @@ -392,13 +392,20 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev, }; for (i = 0; i < tdata->attr_size; i++) { + /* + * We map the attr number to core id of the CPU + * The attr number is always core id + 2 + * The Pkgtemp will always show up as temp1_*, if available + */ + int attr_no = tdata->is_pkg_data ? 1 : tdata->cpu_core_id + 2; + snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, "temp%d_%s", attr_no, suffixes[i]); sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr); tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i]; tdata->sd_attrs[i].dev_attr.attr.mode = 0444; tdata->sd_attrs[i].dev_attr.show = rd_ptr[i]; - tdata->sd_attrs[i].index = attr_no; + tdata->sd_attrs[i].index = index; tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr; } tdata->attr_group.attrs = tdata->attrs; @@ -456,27 +463,22 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, struct platform_data *pdata = platform_get_drvdata(pdev); struct cpuinfo_x86 *c = &cpu_data(cpu); u32 eax, edx; - int err, index, attr_no; + int err, index; /* - * Find attr number for sysfs: - * We map the attr number to core id of the CPU - * The attr number is always core id + 2 - * The Pkgtemp will always show up as temp1_*, if available + * Get the index of tdata in pdata->core_data[] + * tdata for package: pdata->core_data[1] + * tdata for core: pdata->core_data[2] .. pdata->core_data[NUM_REAL_CORES + 1] */ if (pkg_flag) { - attr_no = PKG_SYSFS_ATTR_NO; + index = PKG_SYSFS_ATTR_NO; } else { - index = ida_alloc(&pdata->ida, GFP_KERNEL); + index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); if (index < 0) return index; - pdata->cpu_map[index] = topology_core_id(cpu); - attr_no = index + BASE_SYSFS_ATTR_NO; - } - if (attr_no > MAX_CORE_DATA - 1) { - err = -ERANGE; - goto ida_free; + pdata->cpu_map[index] = topology_core_id(cpu); + index += BASE_SYSFS_ATTR_NO; } tdata = init_temp_data(cpu, pkg_flag); @@ -508,20 +510,20 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, } } - pdata->core_data[attr_no] = tdata; + pdata->core_data[index] = tdata; /* Create sysfs interfaces */ - err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no); + err = create_core_attrs(tdata, pdata->hwmon_dev, index); if (err) goto exit_free; return 0; exit_free: - pdata->core_data[attr_no] = NULL; + pdata->core_data[index] = NULL; kfree(tdata); ida_free: if (!pkg_flag) - ida_free(&pdata->ida, index); + ida_free(&pdata->ida, index - BASE_SYSFS_ATTR_NO); return err; } diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index 80310845fb99..9720ad214c20 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -2462,6 +2462,13 @@ store_pwm(struct device *dev, struct device_attribute *attr, const char *buf, int err; u16 reg; + /* + * The fan control mode should be set to manual if the user wants to adjust + * the fan speed. Otherwise, it will fail to set. + */ + if (index == 0 && data->pwm_enable[nr] > manual) + return -EBUSY; + err = kstrtoul(buf, 10, &val); if (err < 0) return err; diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index 8d8fa8e8afe0..20a9cddb3723 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -654,6 +654,9 @@ static int hisi_ptt_pmu_event_init(struct perf_event *event) int ret; u32 val; + if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type) + return -ENOENT; + if (event->cpu < 0) { dev_dbg(event->pmu->dev, "Per-task mode not supported\n"); return -EOPNOTSUPP; @@ -662,9 +665,6 @@ static int hisi_ptt_pmu_event_init(struct perf_event *event) if (event->attach_state & PERF_ATTACH_TASK) return -EOPNOTSUPP; - if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type) - return -ENOENT; - ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config); if (ret < 0) return ret; diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index e73cdb1d2b5a..784a803279d9 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -89,10 +89,8 @@ obj-$(CONFIG_I2C_NPCM) += i2c-npcm7xx.o obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o obj-$(CONFIG_I2C_OMAP) += i2c-omap.o obj-$(CONFIG_I2C_OWL) += i2c-owl.o -i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o -obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o -i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o -obj-$(CONFIG_I2C_APPLE) += i2c-apple.o +obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi-core.o i2c-pasemi-pci.o +obj-$(CONFIG_I2C_APPLE) += i2c-pasemi-core.o i2c-pasemi-platform.o obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o obj-$(CONFIG_I2C_PNX) += i2c-pnx.o obj-$(CONFIG_I2C_PXA) += i2c-pxa.o diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 3159ffbb77a2..1c970842624b 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -500,11 +500,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, /* Set block buffer mode */ outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv)); - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - if (read_write == I2C_SMBUS_WRITE) { len = data->block[0]; outb_p(len, SMBHSTDAT0(priv)); + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) outb_p(data->block[i+1], SMBBLKDAT(priv)); } @@ -520,6 +519,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, return -EPROTO; data->block[0] = len; + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) data->block[i + 1] = inb_p(SMBBLKDAT(priv)); } @@ -1422,7 +1422,6 @@ static void i801_add_mux(struct i801_priv *priv) lookup->table[i] = GPIO_LOOKUP(mux_config->gpio_chip, mux_config->gpios[i], "mux", 0); gpiod_add_lookup_table(lookup); - priv->lookup = lookup; /* * Register the mux device, we use PLATFORM_DEVID_NONE here @@ -1436,7 +1435,10 @@ static void i801_add_mux(struct i801_priv *priv) sizeof(struct i2c_mux_gpio_platform_data)); if (IS_ERR(priv->mux_pdev)) { gpiod_remove_lookup_table(lookup); + devm_kfree(dev, lookup); dev_err(dev, "Failed to register i2c-mux-gpio device\n"); + } else { + priv->lookup = lookup; } } diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index fc70920c4dda..0c203c614197 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -804,6 +804,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx, ctl &= ~I2CR_MTX; imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + + /* flag the last byte as processed */ + i2c_imx_slave_event(i2c_imx, + I2C_SLAVE_READ_PROCESSED, &value); + i2c_imx_slave_finish_op(i2c_imx); return IRQ_HANDLED; } diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 9028ffb58cc0..f297e41352e7 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -356,3 +356,8 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) return 0; } +EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Olof Johansson "); +MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver"); diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 8fce98bb77ff..75b9c3f26bba 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -605,20 +605,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i peripheral.addr = msgs[i].addr; - if (msgs[i].flags & I2C_M_RD) { - ret = geni_i2c_gpi(gi2c, &msgs[i], &config, - &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); - if (ret) - goto err; - } - ret = geni_i2c_gpi(gi2c, &msgs[i], &config, &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); if (ret) goto err; - if (msgs[i].flags & I2C_M_RD) + if (msgs[i].flags & I2C_M_RD) { + ret = geni_i2c_gpi(gi2c, &msgs[i], &config, + &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); + if (ret) + goto err; + dma_async_issue_pending(gi2c->rx_c); + } + dma_async_issue_pending(gi2c->tx_c); timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 4a49c75a9408..b9cfda6ae9ae 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -77,7 +77,8 @@ #define PRESCL_CTRL0 0x14 #define PRESCL_CTRL0_I2C(x) ((x) << 16) #define PRESCL_CTRL0_I3C(x) (x) -#define PRESCL_CTRL0_MAX GENMASK(9, 0) +#define PRESCL_CTRL0_I3C_MAX GENMASK(9, 0) +#define PRESCL_CTRL0_I2C_MAX GENMASK(15, 0) #define PRESCL_CTRL1 0x18 #define PRESCL_CTRL1_PP_LOW_MASK GENMASK(15, 8) @@ -1234,7 +1235,7 @@ static int cdns_i3c_master_bus_init(struct i3c_master_controller *m) return -EINVAL; pres = DIV_ROUND_UP(sysclk_rate, (bus->scl_rate.i3c * 4)) - 1; - if (pres > PRESCL_CTRL0_MAX) + if (pres > PRESCL_CTRL0_I3C_MAX) return -ERANGE; bus->scl_rate.i3c = sysclk_rate / ((pres + 1) * 4); @@ -1247,7 +1248,7 @@ static int cdns_i3c_master_bus_init(struct i3c_master_controller *m) max_i2cfreq = bus->scl_rate.i2c; pres = (sysclk_rate / (max_i2cfreq * 5)) - 1; - if (pres > PRESCL_CTRL0_MAX) + if (pres > PRESCL_CTRL0_I2C_MAX) return -ERANGE; bus->scl_rate.i2c = sysclk_rate / ((pres + 1) * 5); diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index ffac66db7ac9..1f34747a68bf 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -219,10 +219,12 @@ config BMA400 config BMA400_I2C tristate + select REGMAP_I2C depends on BMA400 config BMA400_SPI tristate + select REGMAP_SPI depends on BMA400 config BMC150_ACCEL diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 7c7d78040793..f1a41b92543a 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -1444,9 +1444,11 @@ static int adxl367_verify_devid(struct adxl367_state *st) unsigned int val; int ret; - ret = regmap_read_poll_timeout(st->regmap, ADXL367_REG_DEVID, val, - val == ADXL367_DEVID_AD, 1000, 10000); + ret = regmap_read(st->regmap, ADXL367_REG_DEVID, &val); if (ret) + return dev_err_probe(st->dev, ret, "Failed to read dev id\n"); + + if (val != ADXL367_DEVID_AD) return dev_err_probe(st->dev, -ENODEV, "Invalid dev id 0x%02X, expected 0x%02X\n", val, ADXL367_DEVID_AD); @@ -1543,6 +1545,8 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops, if (ret) return ret; + fsleep(15000); + ret = adxl367_verify_devid(st); if (ret) return ret; diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c index 3606efa25835..5c040915276d 100644 --- a/drivers/iio/accel/adxl367_i2c.c +++ b/drivers/iio/accel/adxl367_i2c.c @@ -11,7 +11,7 @@ #include "adxl367.h" -#define ADXL367_I2C_FIFO_DATA 0x42 +#define ADXL367_I2C_FIFO_DATA 0x18 struct adxl367_i2c_state { struct regmap *regmap; diff --git a/drivers/iio/adc/ad7091r-base.c b/drivers/iio/adc/ad7091r-base.c index 0e5d3d2e9c98..76002b91c86a 100644 --- a/drivers/iio/adc/ad7091r-base.c +++ b/drivers/iio/adc/ad7091r-base.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -28,6 +29,7 @@ #define AD7091R_REG_RESULT_CONV_RESULT(x) ((x) & 0xfff) /* AD7091R_REG_CONF */ +#define AD7091R_REG_CONF_ALERT_EN BIT(4) #define AD7091R_REG_CONF_AUTO BIT(8) #define AD7091R_REG_CONF_CMD BIT(10) @@ -49,6 +51,27 @@ struct ad7091r_state { struct mutex lock; /*lock to prevent concurent reads */ }; +const struct iio_event_spec ad7091r_events[] = { + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_RISING, + .mask_separate = BIT(IIO_EV_INFO_VALUE) | + BIT(IIO_EV_INFO_ENABLE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_FALLING, + .mask_separate = BIT(IIO_EV_INFO_VALUE) | + BIT(IIO_EV_INFO_ENABLE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_EITHER, + .mask_separate = BIT(IIO_EV_INFO_HYSTERESIS), + }, +}; +EXPORT_SYMBOL_NS_GPL(ad7091r_events, IIO_AD7091R); + static int ad7091r_set_mode(struct ad7091r_state *st, enum ad7091r_mode mode) { int ret, conf; @@ -168,8 +191,142 @@ unlock: return ret; } +static int ad7091r_read_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + int val, ret; + + switch (dir) { + case IIO_EV_DIR_RISING: + ret = regmap_read(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + &val); + if (ret) + return ret; + return val != AD7091R_HIGH_LIMIT; + case IIO_EV_DIR_FALLING: + ret = regmap_read(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + &val); + if (ret) + return ret; + return val != AD7091R_LOW_LIMIT; + default: + return -EINVAL; + } +} + +static int ad7091r_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, int state) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + + if (state) { + return regmap_set_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_ALERT_EN); + } else { + /* + * Set thresholds either to 0 or to 2^12 - 1 as appropriate to + * prevent alerts and thus disable event generation. + */ + switch (dir) { + case IIO_EV_DIR_RISING: + return regmap_write(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + AD7091R_HIGH_LIMIT); + case IIO_EV_DIR_FALLING: + return regmap_write(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + AD7091R_LOW_LIMIT); + default: + return -EINVAL; + } + } +} + +static int ad7091r_read_event_value(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int *val, int *val2) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + int ret; + + switch (info) { + case IIO_EV_INFO_VALUE: + switch (dir) { + case IIO_EV_DIR_RISING: + ret = regmap_read(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + case IIO_EV_DIR_FALLING: + ret = regmap_read(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + default: + return -EINVAL; + } + case IIO_EV_INFO_HYSTERESIS: + ret = regmap_read(st->map, + AD7091R_REG_CH_HYSTERESIS(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ad7091r_write_event_value(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int val, int val2) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + + switch (info) { + case IIO_EV_INFO_VALUE: + switch (dir) { + case IIO_EV_DIR_RISING: + return regmap_write(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + val); + case IIO_EV_DIR_FALLING: + return regmap_write(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + val); + default: + return -EINVAL; + } + case IIO_EV_INFO_HYSTERESIS: + return regmap_write(st->map, + AD7091R_REG_CH_HYSTERESIS(chan->channel), + val); + default: + return -EINVAL; + } +} + static const struct iio_info ad7091r_info = { .read_raw = ad7091r_read_raw, + .read_event_config = &ad7091r_read_event_config, + .write_event_config = &ad7091r_write_event_config, + .read_event_value = &ad7091r_read_event_value, + .write_event_value = &ad7091r_write_event_value, }; static irqreturn_t ad7091r_event_handler(int irq, void *private) @@ -232,6 +389,11 @@ int ad7091r_probe(struct device *dev, const char *name, iio_dev->channels = chip_info->channels; if (irq) { + ret = regmap_update_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_ALERT_EN, BIT(4)); + if (ret) + return ret; + ret = devm_request_threaded_irq(dev, irq, NULL, ad7091r_event_handler, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, name, iio_dev); @@ -243,7 +405,14 @@ int ad7091r_probe(struct device *dev, const char *name, if (IS_ERR(st->vref)) { if (PTR_ERR(st->vref) == -EPROBE_DEFER) return -EPROBE_DEFER; + st->vref = NULL; + /* Enable internal vref */ + ret = regmap_set_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_INT_VREF); + if (ret) + return dev_err_probe(st->dev, ret, + "Error on enable internal reference\n"); } else { ret = regulator_enable(st->vref); if (ret) diff --git a/drivers/iio/adc/ad7091r-base.h b/drivers/iio/adc/ad7091r-base.h index 509748aef9b1..b9e1c8bf3440 100644 --- a/drivers/iio/adc/ad7091r-base.h +++ b/drivers/iio/adc/ad7091r-base.h @@ -8,6 +8,12 @@ #ifndef __DRIVERS_IIO_ADC_AD7091R_BASE_H__ #define __DRIVERS_IIO_ADC_AD7091R_BASE_H__ +#define AD7091R_REG_CONF_INT_VREF BIT(0) + +/* AD7091R_REG_CH_LIMIT */ +#define AD7091R_HIGH_LIMIT 0xFFF +#define AD7091R_LOW_LIMIT 0x0 + struct device; struct ad7091r_state; @@ -17,6 +23,8 @@ struct ad7091r_chip_info { unsigned int vref_mV; }; +extern const struct iio_event_spec ad7091r_events[3]; + extern const struct regmap_config ad7091r_regmap_config; int ad7091r_probe(struct device *dev, const char *name, diff --git a/drivers/iio/adc/ad7091r5.c b/drivers/iio/adc/ad7091r5.c index 47f5763023a4..12d475463945 100644 --- a/drivers/iio/adc/ad7091r5.c +++ b/drivers/iio/adc/ad7091r5.c @@ -12,26 +12,6 @@ #include "ad7091r-base.h" -static const struct iio_event_spec ad7091r5_events[] = { - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_RISING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), - }, - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_FALLING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), - }, - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_EITHER, - .mask_separate = BIT(IIO_EV_INFO_HYSTERESIS), - }, -}; - #define AD7091R_CHANNEL(idx, bits, ev, num_ev) { \ .type = IIO_VOLTAGE, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -44,10 +24,10 @@ static const struct iio_event_spec ad7091r5_events[] = { .scan_type.realbits = bits, \ } static const struct iio_chan_spec ad7091r5_channels_irq[] = { - AD7091R_CHANNEL(0, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(1, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(2, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(3, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), + AD7091R_CHANNEL(0, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(1, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(2, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(3, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), }; static const struct iio_chan_spec ad7091r5_channels_noirq[] = { diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig index 83e53acfbe88..c7f5866a177d 100644 --- a/drivers/iio/imu/bno055/Kconfig +++ b/drivers/iio/imu/bno055/Kconfig @@ -8,6 +8,7 @@ config BOSCH_BNO055 config BOSCH_BNO055_SERIAL tristate "Bosch BNO055 attached via UART" depends on SERIAL_DEV_BUS + select REGMAP select BOSCH_BNO055 help Enable this to support Bosch BNO055 IMUs attached via UART. diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 95dfcbc4d0b8..6eee8800a3b5 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1611,10 +1611,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev) ret = iio_device_register_sysfs_group(indio_dev, &iio_dev_opaque->chan_attr_group); if (ret) - goto error_clear_attrs; + goto error_free_chan_attrs; return 0; +error_free_chan_attrs: + kfree(iio_dev_opaque->chan_attr_group.attrs); + iio_dev_opaque->chan_attr_group.attrs = NULL; error_clear_attrs: iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list); diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 5a1a625d8d16..85097b769c20 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -228,6 +228,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev, case HID_USAGE_SENSOR_TIME_TIMESTAMP: als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes, *(s64 *)raw_data); + ret = 0; break; default: break; diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c index 69938204456f..42b70cd42b39 100644 --- a/drivers/iio/magnetometer/rm3100-core.c +++ b/drivers/iio/magnetometer/rm3100-core.c @@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) struct rm3100_data *data; unsigned int tmp; int ret; + int samp_rate_index; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) @@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp); if (ret < 0) return ret; + + samp_rate_index = tmp - RM3100_TMRC_OFFSET; + if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) { + dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n"); + return -EINVAL; + } /* Initializing max wait time, which is double conversion time. */ - data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2] - * 2; + data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2; /* Cycle count values may not be what we want. */ if ((tmp - RM3100_TMRC_OFFSET) == 0) diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h index e9d282679ef1..944d9071245d 100644 --- a/drivers/infiniband/core/cm_trace.h +++ b/drivers/infiniband/core/cm_trace.h @@ -16,7 +16,7 @@ #include #include -#include +#include /* * enum ib_cm_state, from include/rdma/ib_cm.h diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0773ca7ace24..067d7f42871f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3547,121 +3547,6 @@ err: return ret; } -static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - struct sockaddr_storage zero_sock = {}; - - if (src_addr && src_addr->sa_family) - return rdma_bind_addr(id, src_addr); - - /* - * When the src_addr is not specified, automatically supply an any addr - */ - zero_sock.ss_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = - (struct sockaddr_in6 *)&zero_sock; - struct sockaddr_in6 *dst_addr6 = - (struct sockaddr_in6 *)dst_addr; - - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = - dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *)&zero_sock)->sib_pkey = - ((struct sockaddr_ib *)dst_addr)->sib_pkey; - } - return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); -} - -/* - * If required, resolve the source address for bind and leave the id_priv in - * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior - * calls made by ULP, a previously bound ID will not be re-bound and src_addr is - * ignored. - */ -static int resolve_prepare_src(struct rdma_id_private *id_priv, - struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - int ret; - - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - /* For a well behaved ULP state will be RDMA_CM_IDLE */ - ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); - if (ret) - goto err_dst; - if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, - RDMA_CM_ADDR_QUERY))) { - ret = -EINVAL; - goto err_dst; - } - } - - if (cma_family(id_priv) != dst_addr->sa_family) { - ret = -EINVAL; - goto err_state; - } - return 0; - -err_state: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); -err_dst: - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return ret; -} - -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, unsigned long timeout_ms) -{ - struct rdma_id_private *id_priv = - container_of(id, struct rdma_id_private, id); - int ret; - - ret = resolve_prepare_src(id_priv, src_addr, dst_addr); - if (ret) - return ret; - - if (cma_any_addr(dst_addr)) { - ret = cma_resolve_loopback(id_priv); - } else { - if (dst_addr->sa_family == AF_IB) { - ret = cma_resolve_ib_addr(id_priv); - } else { - /* - * The FSM can return back to RDMA_CM_ADDR_BOUND after - * rdma_resolve_ip() is called, eg through the error - * path in addr_handler(). If this happens the existing - * request must be canceled before issuing a new one. - * Since canceling a request is a bit slow and this - * oddball path is rare, keep track once a request has - * been issued. The track turns out to be a permanent - * state since this is the only cancel as it is - * immediately before rdma_resolve_ip(). - */ - if (id_priv->used_resolve_ip) - rdma_addr_cancel(&id->route.addr.dev_addr); - else - id_priv->used_resolve_ip = 1; - ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, - &id->route.addr.dev_addr, - timeout_ms, addr_handler, - false, id_priv); - } - } - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); - return ret; -} -EXPORT_SYMBOL(rdma_resolve_addr); - int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; @@ -4064,27 +3949,26 @@ err: } EXPORT_SYMBOL(rdma_listen); -int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +static int rdma_bind_addr_dst(struct rdma_id_private *id_priv, + struct sockaddr *addr, const struct sockaddr *daddr) { - struct rdma_id_private *id_priv; + struct sockaddr *id_daddr; int ret; - struct sockaddr *daddr; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) return -EAFNOSUPPORT; - id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; - ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr); if (ret) goto err1; memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { - ret = cma_translate_addr(addr, &id->route.addr.dev_addr); + ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr); if (ret) goto err1; @@ -4104,8 +3988,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } #endif } - daddr = cma_dst_addr(id_priv); - daddr->sa_family = addr->sa_family; + id_daddr = cma_dst_addr(id_priv); + if (daddr != id_daddr) + memcpy(id_daddr, daddr, rdma_addr_size(addr)); + id_daddr->sa_family = addr->sa_family; ret = cma_get_port(id_priv); if (ret) @@ -4121,6 +4007,129 @@ err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr_dst(id_priv, src_addr, dst_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; + } + return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr); +} + +/* + * If required, resolve the source address for bind and leave the id_priv in + * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior + * calls made by ULP, a previously bound ID will not be re-bound and src_addr is + * ignored. + */ +static int resolve_prepare_src(struct rdma_id_private *id_priv, + struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + int ret; + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ + ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); + if (ret) + return ret; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_ADDR_QUERY))) + return -EINVAL; + + } else { + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); + } + + if (cma_family(id_priv) != dst_addr->sa_family) { + ret = -EINVAL; + goto err_state; + } + return 0; + +err_state: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr, unsigned long timeout_ms) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + int ret; + + ret = resolve_prepare_src(id_priv, src_addr, dst_addr); + if (ret) + return ret; + + if (cma_any_addr(dst_addr)) { + ret = cma_resolve_loopback(id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + /* + * The FSM can return back to RDMA_CM_ADDR_BOUND after + * rdma_resolve_ip() is called, eg through the error + * path in addr_handler(). If this happens the existing + * request must be canceled before issuing a new one. + * Since canceling a request is a bit slow and this + * oddball path is rare, keep track once a request has + * been issued. The track turns out to be a permanent + * state since this is the only cancel as it is + * immediately before rdma_resolve_ip(). + */ + if (id_priv->used_resolve_ip) + rdma_addr_cancel(&id->route.addr.dev_addr); + else + id_priv->used_resolve_ip = 1; + ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, + &id->route.addr.dev_addr, + timeout_ms, addr_handler, + false, id_priv); + } + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv)); +} EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index e45264267bcc..47f3c6e4be89 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -15,7 +15,7 @@ #define _TRACE_RDMA_CMA_H #include -#include +#include DECLARE_EVENT_CLASS(cma_fsm_class, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3a9b9a28d858..453188db39d8 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1730,7 +1730,7 @@ static int assign_client_id(struct ib_client *client) { int ret; - down_write(&clients_rwsem); + lockdep_assert_held(&clients_rwsem); /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in @@ -1739,14 +1739,11 @@ static int assign_client_id(struct ib_client *client) client->client_id = highest_client_id; ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); if (ret) - goto out; + return ret; highest_client_id++; xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - -out: - up_write(&clients_rwsem); - return ret; + return 0; } static void remove_client_id(struct ib_client *client) @@ -1776,25 +1773,35 @@ int ib_register_client(struct ib_client *client) { struct ib_device *device; unsigned long index; + bool need_unreg = false; int ret; refcount_set(&client->uses, 1); init_completion(&client->uses_zero); + + /* + * The devices_rwsem is held in write mode to ensure that a racing + * ib_register_device() sees a consisent view of clients and devices. + */ + down_write(&devices_rwsem); + down_write(&clients_rwsem); ret = assign_client_id(client); if (ret) - return ret; + goto out; - down_read(&devices_rwsem); + need_unreg = true; xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { ret = add_client_context(device, client); - if (ret) { - up_read(&devices_rwsem); - ib_unregister_client(client); - return ret; - } + if (ret) + goto out; } - up_read(&devices_rwsem); - return 0; + ret = 0; +out: + up_write(&clients_rwsem); + up_write(&devices_rwsem); + if (need_unreg && ret) + ib_unregister_client(client); + return ret; } EXPORT_SYMBOL(ib_register_client); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index d96c78e436f9..5c284dfbe692 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -131,6 +131,11 @@ struct ib_umad_packet { struct ib_user_mad mad; }; +struct ib_rmpp_mad_hdr { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; +} __packed; + #define CREATE_TRACE_POINTS #include @@ -494,11 +499,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; + struct ib_rmpp_mad_hdr *rmpp_mad_hdr; struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct rdma_ah_attr ah_attr; struct ib_ah *ah; - struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; u8 base_version; @@ -506,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; - packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; @@ -560,13 +565,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_up; } - rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; - hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); + rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data; + hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class); - if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) && ib_mad_kernel_rmpp_agent(agent)) { copy_offset = IB_MGMT_RMPP_HDR; - rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE; } else { copy_offset = IB_MGMT_MAD_HDR; @@ -615,12 +620,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); - rmpp_mad->mad_hdr.tid = *tid; + rmpp_mad_hdr->mad_hdr.tid = *tid; } if (!ib_mad_kernel_rmpp_agent(agent) - && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) - && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { + && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { spin_lock_irq(&file->send_lock); list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4ed8814efde6..6ed0568747ea 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1710,7 +1710,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, switch (srq_attr_mask) { case IB_SRQ_MAX_WR: /* SRQ resize is not supported */ - break; + return -EINVAL; case IB_SRQ_LIMIT: /* Change the SRQ threshold */ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe) @@ -1725,13 +1725,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ - break; + return 0; default: ibdev_err(&rdev->ibdev, "Unsupported srq_attr_mask 0x%x", srq_attr_mask); return -EINVAL; } - return 0; } int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 51ae58c02b15..802b0e5801a7 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -2089,7 +2089,7 @@ int init_credit_return(struct hfi1_devdata *dd) "Unable to allocate credit return DMA range for NUMA %d\n", i); ret = -ENOMEM; - goto done; + goto free_cr_base; } } set_dev_node(&dd->pcidev->dev, dd->node); @@ -2097,6 +2097,10 @@ int init_credit_return(struct hfi1_devdata *dd) ret = 0; done: return ret; + +free_cr_base: + free_credit_return(dd); + goto done; } void free_credit_return(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 26c62162759b..969c5c3ab859 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; - if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) { + if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { __sdma_txclean(dd, tx); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1112afa0af55..8748b65c87ea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -595,6 +595,13 @@ struct hns_roce_work { u32 queue_num; }; +enum hns_roce_cong_type { + CONG_TYPE_DCQCN, + CONG_TYPE_LDCP, + CONG_TYPE_HC3, + CONG_TYPE_DIP, +}; + struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_wq rq; @@ -639,6 +646,7 @@ struct hns_roce_qp { struct list_head sq_node; /* all send qps are on a list */ struct hns_user_mmap_entry *dwqe_mmap_entry; u32 config; + enum hns_roce_cong_type cong_type; }; struct hns_roce_ib_iboe { @@ -710,13 +718,6 @@ struct hns_roce_eq_table { struct hns_roce_eq *eq; }; -enum cong_type { - CONG_TYPE_DCQCN, - CONG_TYPE_LDCP, - CONG_TYPE_HC3, - CONG_TYPE_DIP, -}; - struct hns_roce_caps { u64 fw_ver; u8 num_ports; @@ -847,7 +848,7 @@ struct hns_roce_caps { u16 default_aeq_period; u16 default_aeq_arm_st; u16 default_ceq_arm_st; - enum cong_type cong_type; + enum hns_roce_cong_type cong_type; }; enum hns_roce_device_state { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 58fbb1d3b7f4..d06b19e69a15 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4886,12 +4886,15 @@ static int check_cong_type(struct ib_qp *ibqp, struct hns_roce_congestion_algorithm *cong_alg) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - if (ibqp->qp_type == IB_QPT_UD) - hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_GSI) + hr_qp->cong_type = CONG_TYPE_DCQCN; + else + hr_qp->cong_type = hr_dev->caps.cong_type; /* different congestion types match different configurations */ - switch (hr_dev->caps.cong_type) { + switch (hr_qp->cong_type) { case CONG_TYPE_DCQCN: cong_alg->alg_sel = CONG_DCQCN; cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; @@ -4919,8 +4922,8 @@ static int check_cong_type(struct ib_qp *ibqp, default: ibdev_warn(&hr_dev->ib_dev, "invalid type(%u) for congestion selection.\n", - hr_dev->caps.cong_type); - hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + hr_qp->cong_type); + hr_qp->cong_type = CONG_TYPE_DCQCN; cong_alg->alg_sel = CONG_DCQCN; cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; cong_alg->dip_vld = DIP_INVALID; @@ -4939,6 +4942,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_congestion_algorithm cong_field; struct ib_device *ibdev = ibqp->device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); u32 dip_idx = 0; int ret; @@ -4951,7 +4955,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return ret; hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id + - hr_dev->caps.cong_type * HNS_ROCE_CONG_SIZE); + hr_qp->cong_type * HNS_ROCE_CONG_SIZE); hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID); hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel); hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL); diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index ad54260cb58c..ebe98fa2b1cd 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -345,6 +345,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 311a1138e838..918a2d783141 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -379,6 +379,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: + case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: @@ -562,6 +563,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); + if (rf == dev_id) { + tasklet_kill(&rf->dpc_tasklet); + } else { + struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id; + + tasklet_kill(&iwceq->dpc_tasklet); + } } /** diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 280d633d4ec4..d691cdef5e9a 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1414,6 +1414,78 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); } +/** + * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ + * @ukinfo: qp initialization info + * @sq_shift: Returns shift of SQ + * @rq_shift: Returns shift of RQ + */ +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } +} + +/** + * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. + * @ukinfo: qp initialization info + * @sq_depth: Returns depth of SQ + * @sq_shift: Returns shift of SQ + */ +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size, + *sq_shift, sq_depth); + + return status; +} + +/** + * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size. + * @ukinfo: qp initialization info + * @rq_depth: Returns depth of RQ + * @rq_shift: Returns shift of RQ + */ +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift) +{ + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } + + status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size, + *rq_shift, rq_depth); + + return status; +} + /** * irdma_uk_qp_init - initialize shared qp * @qp: hw qp (user and kernel) @@ -1428,23 +1500,12 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { int ret_code = 0; u32 sq_ring_size; - u8 sqshift, rqshift; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) return -EINVAL; - irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift); - if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt, - info->max_inline_data, &sqshift); - if (info->abi_ver > 4) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - } else { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt + 1, - info->max_inline_data, &sqshift); - } qp->qp_caps = info->qp_caps; qp->sq_base = info->sq; qp->rq_base = info->rq; @@ -1458,7 +1519,7 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->sq_size = info->sq_size; qp->push_mode = false; qp->max_sq_frag_cnt = info->max_sq_frag_cnt; - sq_ring_size = qp->sq_size << sqshift; + sq_ring_size = qp->sq_size << info->sq_shift; IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); if (info->first_sq_wq) { @@ -1473,9 +1534,9 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->rq_size = info->rq_size; qp->max_rq_frag_cnt = info->max_rq_frag_cnt; qp->max_inline_data = info->max_inline_data; - qp->rq_wqe_size = rqshift; + qp->rq_wqe_size = info->rq_shift; IRDMA_RING_INIT(qp->rq_ring, qp->rq_size); - qp->rq_wqe_size_multiplier = 1 << rqshift; + qp->rq_wqe_size_multiplier = 1 << info->rq_shift; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) qp->wqe_ops = iw_wqe_uk_ops_gen_1; else diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index d0cdf609f5e0..1e0e1a71dbad 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -295,6 +295,12 @@ void irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift); +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift); +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; @@ -374,8 +380,12 @@ struct irdma_qp_uk_init_info { u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; + u32 sq_depth; + u32 rq_depth; u8 first_sq_wq; u8 type; + u8 sq_shift; + u8 rq_shift; int abi_ver; bool legacy_mode; }; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 447e1bcc82a3..76c5f461faca 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -277,7 +277,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct irdma_alloc_ucontext_req req = {}; struct irdma_alloc_ucontext_resp uresp = {}; struct irdma_ucontext *ucontext = to_ucontext(uctx); - struct irdma_uk_attrs *uk_attrs; + struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) @@ -292,7 +292,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; - uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; + if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) + ucontext->use_raw_attrs = true; + /* GEN_1 legacy support with libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) @@ -327,6 +329,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; + uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); @@ -566,6 +569,86 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev, } } +/** + * irdma_setup_umode_qp - setup sq and rq size in user mode qp + * @iwdev: iwarp device + * @iwqp: qp ptr (user or kernel) + * @info: initialize info to return + * @init_attr: Initial QP create attributes + */ +static int irdma_setup_umode_qp(struct ib_udata *udata, + struct irdma_device *iwdev, + struct irdma_qp *iwqp, + struct irdma_qp_init_info *info, + struct ib_qp_init_attr *init_attr) +{ + struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct irdma_ucontext, ibucontext); + struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; + struct irdma_create_qp_req req; + unsigned long flags; + int ret; + + ret = ib_copy_from_udata(&req, udata, + min(sizeof(req), udata->inlen)); + if (ret) { + ibdev_dbg(&iwdev->ibdev, "VERBS: ib_copy_from_data fail\n"); + return ret; + } + + iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; + iwqp->user_mode = 1; + if (req.user_wqe_bufs) { + info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode; + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); + iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); + + if (!iwqp->iwpbl) { + ret = -ENODATA; + ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); + return ret; + } + } + + if (!ucontext->use_raw_attrs) { + /** + * Maintain backward compat with older ABI which passes sq and + * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr. + * There is no way to compute the correct value of + * iwqp->max_send_wr/max_recv_wr in the kernel. + */ + iwqp->max_send_wr = init_attr->cap.max_send_wr; + iwqp->max_recv_wr = init_attr->cap.max_recv_wr; + ukinfo->sq_size = init_attr->cap.max_send_wr; + ukinfo->rq_size = init_attr->cap.max_recv_wr; + irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift, + &ukinfo->rq_shift); + } else { + ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); + if (ret) + return ret; + + ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); + if (ret) + return ret; + + iwqp->max_send_wr = + (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = + (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; + } + + irdma_setup_virt_qp(iwdev, iwqp, info); + + return 0; +} + /** * irdma_setup_kmode_qp - setup initialization for kernel mode qp * @iwdev: iwarp device @@ -579,40 +662,28 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, struct ib_qp_init_attr *init_attr) { struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem; - u32 sqdepth, rqdepth; - u8 sqshift, rqshift; u32 size; int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; - struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; - irdma_get_wqe_shift(uk_attrs, - uk_attrs->hw_rev >= IRDMA_GEN_2 ? ukinfo->max_sq_frag_cnt + 1 : - ukinfo->max_sq_frag_cnt, - ukinfo->max_inline_data, &sqshift); - status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift, - &sqdepth); + status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); if (status) return status; - if (uk_attrs->hw_rev == IRDMA_GEN_1) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - else - irdma_get_wqe_shift(uk_attrs, ukinfo->max_rq_frag_cnt, 0, - &rqshift); - - status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift, - &rqdepth); + status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); if (status) return status; iwqp->kqp.sq_wrid_mem = - kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.sq_wrid_mem) return -ENOMEM; iwqp->kqp.rq_wrid_mem = - kcalloc(rqdepth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + if (!iwqp->kqp.rq_wrid_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; @@ -622,7 +693,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem; ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem; - size = (sqdepth + rqdepth) * IRDMA_QP_WQE_MIN_SIZE; + size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE; size += (IRDMA_SHADOW_AREA_SIZE << 3); mem->size = ALIGN(size, 256); @@ -638,16 +709,18 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq = mem->va; info->sq_pa = mem->pa; - ukinfo->rq = &ukinfo->sq[sqdepth]; - info->rq_pa = info->sq_pa + (sqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->shadow_area = ukinfo->rq[rqdepth].elem; - info->shadow_area_pa = info->rq_pa + (rqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->sq_size = sqdepth >> sqshift; - ukinfo->rq_size = rqdepth >> rqshift; - ukinfo->qp_id = iwqp->ibqp.qp_num; + ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth]; + info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem; + info->shadow_area_pa = + info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; - init_attr->cap.max_send_wr = (sqdepth - IRDMA_SQ_RSVD) >> sqshift; - init_attr->cap.max_recv_wr = (rqdepth - IRDMA_RQ_RSVD) >> rqshift; + iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + init_attr->cap.max_send_wr = iwqp->max_send_wr; + init_attr->cap.max_recv_wr = iwqp->max_recv_wr; return 0; } @@ -762,7 +835,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || - init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) + init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || + init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || + init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { @@ -803,18 +878,14 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp = to_iwqp(ibqp); - struct irdma_create_qp_req req = {}; struct irdma_create_qp_resp uresp = {}; u32 qp_num = 0; int err_code; - int sq_size; - int rq_size; struct irdma_sc_qp *qp; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {}; struct irdma_qp_host_ctx_info *ctx_info; - unsigned long flags; err_code = irdma_validate_qp_attrs(init_attr, iwdev); if (err_code) @@ -824,13 +895,10 @@ static int irdma_create_qp(struct ib_qp *ibqp, udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) return -EINVAL; - sq_size = init_attr->cap.max_send_wr; - rq_size = init_attr->cap.max_recv_wr; - init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.uk_attrs = uk_attrs; - init_info.qp_uk_init_info.sq_size = sq_size; - init_info.qp_uk_init_info.rq_size = rq_size; + init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr; + init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; @@ -872,7 +940,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE; init_info.pd = &iwpd->sc_pd; - init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num; + init_info.qp_uk_init_info.qp_id = qp_num; if (!rdma_protocol_roce(&iwdev->ibdev, 1)) init_info.qp_uk_init_info.first_sq_wq = 1; iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; @@ -880,36 +948,9 @@ static int irdma_create_qp(struct ib_qp *ibqp, init_waitqueue_head(&iwqp->mod_qp_waitq); if (udata) { - err_code = ib_copy_from_udata(&req, udata, - min(sizeof(req), udata->inlen)); - if (err_code) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: ib_copy_from_data fail\n"); - goto error; - } - - iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - iwqp->user_mode = 1; - if (req.user_wqe_bufs) { - struct irdma_ucontext *ucontext = - rdma_udata_to_drv_context(udata, - struct irdma_ucontext, - ibucontext); - - init_info.qp_uk_init_info.legacy_mode = ucontext->legacy_mode; - spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); - - if (!iwqp->iwpbl) { - err_code = -ENODATA; - ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); - goto error; - } - } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - irdma_setup_virt_qp(iwdev, iwqp, &init_info); + err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, + init_attr); } else { INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; @@ -964,8 +1005,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; rf->qp_table[qp_num] = iwqp; - iwqp->max_send_wr = sq_size; - iwqp->max_recv_wr = rq_size; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (dev->ws_add(&iwdev->vsi, 0)) { @@ -986,8 +1025,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) uresp.lsmm = 1; } - uresp.actual_sq_size = sq_size; - uresp.actual_rq_size = rq_size; + uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size; + uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size; uresp.qp_id = qp_num; uresp.qp_caps = qp->qp_uk.qp_caps; @@ -2119,9 +2158,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.cq_base_pa = iwcq->kmem.pa; } - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) - info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, - (u32)IRDMA_MAX_CQ_READ_THRESH); + info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n"); @@ -2825,7 +2863,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; iwmr->ibmr.iova = virt; - iwmr->page_size = PAGE_SIZE; + iwmr->page_size = SZ_4K; if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { iwmr->page_size = ib_umem_find_best_pgsz(region, @@ -2845,6 +2883,13 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, switch (req.reg_type) { case IRDMA_MEMREG_TYPE_QP: + /* iWarp: Catch page not starting on OS page boundary */ + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && + ib_umem_offset(iwmr->region)) { + err = -EINVAL; + goto error; + } + total = req.sq_pages + req.rq_pages + shadow_pgcnt; if (total > iwmr->page_cnt) { err = -EINVAL; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 9f9e273bbff3..0bc0d0faa086 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -18,7 +18,8 @@ struct irdma_ucontext { struct list_head qp_reg_mem_list; spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */ int abi_ver; - bool legacy_mode; + u8 legacy_mode : 1; + u8 use_raw_attrs : 1; }; struct irdma_pd { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index f8e2baed27a5..7013ce20549b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2951,7 +2951,7 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, UVERBS_IDR_ANY_OBJECT, - UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 855f3f4fefad..737db67a9ce1 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, */ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, left); - memcpy(eseg->inline_hdr.start, pdata, copysz); + memcpy(eseg->inline_hdr.data, pdata, copysz); stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - sizeof(eseg->inline_hdr.start) + copysz, 16); *size += stride / 16; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d745ce9dc88a..61755b5f3e20 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); - if (rc) + if (rc) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, + qp->usq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + } return rc; + } } memset(&in_params, 0, sizeof(in_params)); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 5b3154503bf4..319d4288eddd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -531,21 +531,18 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = SENDONLY_FULLMEMBER_JOIN; } - spin_unlock_irq(&priv->lock); multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, - &rec, comp_mask, GFP_KERNEL, + &rec, comp_mask, GFP_ATOMIC, ipoib_mcast_join_complete, mcast); - spin_lock_irq(&priv->lock); if (IS_ERR(multicast)) { ret = PTR_ERR(multicast); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); /* Requeue this join task with a backoff delay */ __ipoib_mcast_schedule_join_thread(priv, mcast, 1); clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - spin_unlock_irq(&priv->lock); complete(&mcast->done); - spin_lock_irq(&priv->lock); + return ret; } return 0; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index d3c436ead694..4aa80c9388f0 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -133,7 +133,7 @@ static ssize_t mpath_policy_store(struct device *dev, /* distinguish "mi" and "min-latency" with length */ len = strnlen(buf, NAME_MAX); - if (buf[len - 1] == '\n') + if (len && buf[len - 1] == '\n') len--; if (!strncasecmp(buf, "round-robin", 11) || diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 25e799dba999..fd6c260d5857 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); @@ -210,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s) /** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. - * @ch: SRPT RDMA channel. + * @ptr: SRPT RDMA channel. */ -static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +static void srpt_qp_event(struct ib_event *event, void *ptr) { + struct srpt_rdma_ch *ch = ptr; + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", event->event, ch, ch->sess_name, ch->qp->qp_num, get_ch_state_name(ch->state)); @@ -1807,8 +1813,7 @@ retry: ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; - qp_init->event_handler - = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->event_handler = srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; @@ -3204,7 +3209,6 @@ static int srpt_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, srpt_event_handler); - ib_register_event_handler(&sdev->event_handler); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; @@ -3227,6 +3231,7 @@ static int srpt_add_one(struct ib_device *device) } } + ib_register_event_handler(&sdev->event_handler); spin_lock(&srpt_dev_lock); list_add_tail(&sdev->list, &srpt_dev_list); spin_unlock(&srpt_dev_lock); @@ -3237,7 +3242,6 @@ static int srpt_add_one(struct ib_device *device) err_port: srpt_unregister_mad_agent(sdev, i); - ib_unregister_event_handler(&sdev->event_handler); err_cm: if (sdev->cm_id) ib_destroy_cm_id(sdev->cm_id); diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e8011d70d079..13c36f51b935 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -294,6 +294,7 @@ static const struct xpad_device { { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -489,6 +490,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x15e4), /* Numark X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ + XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA Controllers */ @@ -562,6 +564,9 @@ struct xboxone_init_packet { #define GIP_MOTOR_LT BIT(3) #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT) +#define GIP_WIRED_INTF_DATA 0 +#define GIP_WIRED_INTF_AUDIO 1 + /* * This packet is required for all Xbox One pads with 2015 * or later firmware installed (or present from the factory). @@ -2006,7 +2011,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id } if (xpad->xtype == XTYPE_XBOXONE && - intf->cur_altsetting->desc.bInterfaceNumber != 0) { + intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) { /* * The Xbox One controller lists three interfaces all with the * same interface class, subclass and protocol. Differentiate by diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index c4d8caadec59..661d6c8b059b 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -792,7 +792,6 @@ static int atkbd_probe(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; - bool skip_getid; /* * Some systems, where the bit-twiddling when testing the io-lines of the @@ -806,6 +805,11 @@ static int atkbd_probe(struct atkbd *atkbd) "keyboard reset failed on %s\n", ps2dev->serio->phys); + if (atkbd_skip_getid(atkbd)) { + atkbd->id = 0xab83; + return 0; + } + /* * Then we check the keyboard ID. We should get 0xab83 under normal conditions. * Some keyboards report different values, but the first byte is always 0xab or @@ -814,18 +818,17 @@ static int atkbd_probe(struct atkbd *atkbd) */ param[0] = param[1] = 0xa5; /* initialize with invalid values */ - skip_getid = atkbd_skip_getid(atkbd); - if (skip_getid || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { + if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { /* - * If the get ID command was skipped or failed, we check if we can at least set + * If the get ID command failed, we check if we can at least set * the LEDs on the keyboard. This should work on every keyboard out there. * It also turns the LEDs off, which we want anyway. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; - atkbd->id = skip_getid ? 0xab83 : 0xabba; + atkbd->id = 0xabba; return 0; } diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c index c3937d2fc744..a0f9978c68f5 100644 --- a/drivers/input/keyboard/gpio_keys_polled.c +++ b/drivers/input/keyboard/gpio_keys_polled.c @@ -319,12 +319,10 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) error = devm_gpio_request_one(dev, button->gpio, flags, button->desc ? : DRV_NAME); - if (error) { - dev_err(dev, - "unable to claim gpio %u, err=%d\n", - button->gpio, error); - return error; - } + if (error) + return dev_err_probe(dev, error, + "unable to claim gpio %u\n", + button->gpio); bdata->gpiod = gpio_to_desc(button->gpio); if (!bdata->gpiod) { diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index b585b1dab870..dfc6c581873b 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -634,6 +634,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOAUX) }, + { + /* Fujitsu Lifebook U728 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { /* Gigabyte M912 */ .matches = { @@ -1208,6 +1216,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NS5x_7xPU"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 3f0732db7bf5..6de64b3f900f 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) } } - if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) { + /* Some devices with gpio_int_idx 0 list a third unused GPIO */ + if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) { ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_first_gpios; } else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) { diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 1d9494f64a21..4526ff2e1bd5 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -29,7 +29,6 @@ static LIST_HEAD(icc_providers); static int providers_count; static bool synced_state; static DEFINE_MUTEX(icc_lock); -static DEFINE_MUTEX(icc_bw_lock); static struct dentry *icc_debugfs_dir; static void icc_summary_show_one(struct seq_file *s, struct icc_node *n) @@ -636,7 +635,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) if (WARN_ON(IS_ERR(path) || !path->num_nodes)) return -EINVAL; - mutex_lock(&icc_bw_lock); + mutex_lock(&icc_lock); old_avg = path->reqs[0].avg_bw; old_peak = path->reqs[0].peak_bw; @@ -668,7 +667,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) apply_constraints(path); } - mutex_unlock(&icc_bw_lock); + mutex_unlock(&icc_lock); trace_icc_set_bw_end(path, ret); @@ -971,7 +970,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); node->provider = provider; list_add_tail(&node->node_list, &provider->nodes); @@ -997,7 +995,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) node->avg_bw = 0; node->peak_bw = 0; - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_node_add); @@ -1137,7 +1134,6 @@ void icc_sync_state(struct device *dev) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); synced_state = true; list_for_each_entry(p, &icc_providers, provider_list) { dev_dbg(p->dev, "interconnect provider is in synced state\n"); @@ -1150,21 +1146,13 @@ void icc_sync_state(struct device *dev) } } } - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_sync_state); static int __init icc_init(void) { - struct device_node *root; - - /* Teach lockdep about lock ordering wrt. shrinker: */ - fs_reclaim_acquire(GFP_KERNEL); - might_lock(&icc_bw_lock); - fs_reclaim_release(GFP_KERNEL); - - root = of_find_node_by_path("/"); + struct device_node *root = of_find_node_by_path("/"); providers_count = of_count_icc_providers(root); of_node_put(root); diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c index 83461e31774e..d9ee193fb18b 100644 --- a/drivers/interconnect/qcom/sc8180x.c +++ b/drivers/interconnect/qcom/sc8180x.c @@ -1387,6 +1387,7 @@ static struct qcom_icc_bcm bcm_mm0 = { static struct qcom_icc_bcm bcm_co0 = { .name = "CO0", + .keepalive = true, .num_nodes = 1, .nodes = { &slv_qns_cdsp_mem_noc } }; diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index af63be3e4fce..dd1949cf5f10 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -192,7 +192,7 @@ source "drivers/iommu/intel/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI - select DMAR_TABLE + select DMAR_TABLE if INTEL_IOMMU help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f6e64c985802..cc94ac666233 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2047,6 +2047,9 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) /* Prevent binding other PCI device drivers to IOMMU devices */ iommu->dev->match_driver = false; + /* ACPI _PRT won't have an IRQ for IOMMU */ + iommu->dev->irq_managed = 1; + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8966f7d5aab6..82f100e591b5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -152,6 +152,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q) q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); } +static void queue_sync_cons_ovf(struct arm_smmu_queue *q) +{ + struct arm_smmu_ll_queue *llq = &q->llq; + + if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) + return; + + llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | + Q_IDX(llq, llq->cons); + queue_sync_cons_out(q); +} + static int queue_sync_prod_in(struct arm_smmu_queue *q) { u32 prod; @@ -1583,8 +1595,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } @@ -1642,9 +1653,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); - queue_sync_cons_out(q); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index d80065c8105a..f15dcb9e4175 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -267,12 +267,26 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) { - unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + unsigned int last_s2cr; u32 reg; u32 smr; int i; + /* + * Some platforms support more than the Arm SMMU architected maximum of + * 128 stream matching groups. For unknown reasons, the additional + * groups don't exhibit the same behavior as the architected registers, + * so limit the groups to 128 until the behavior is fixed for the other + * groups. + */ + if (smmu->num_mapping_groups > 128) { + dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n"); + smmu->num_mapping_groups = 128; + } + + last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); + /* * With some firmware versions writes to S2CR of type FAULT are * ignored, and writing BYPASS will end up written as FAULT in the diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index aa6d62cc567a..3fa66dba0a32 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1547,6 +1547,14 @@ static size_t iommu_dma_opt_mapping_size(void) return iova_rcache_range(); } +static size_t iommu_dma_max_mapping_size(struct device *dev) +{ + if (dev_is_untrusted(dev)) + return swiotlb_max_mapping_size(dev); + + return SIZE_MAX; +} + static const struct dma_map_ops iommu_dma_ops = { .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, @@ -1569,6 +1577,7 @@ static const struct dma_map_ops iommu_dma_ops = { .unmap_resource = iommu_dma_unmap_resource, .get_merge_boundary = iommu_dma_get_merge_boundary, .opt_mapping_size = iommu_dma_opt_mapping_size, + .max_mapping_size = iommu_dma_max_mapping_size, }; /* diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index b7dff5092fd2..12e1e90fdae1 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -96,4 +96,15 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON passing intel_iommu=sm_on to the kernel. If not sure, please use the default value. +config INTEL_IOMMU_PERF_EVENTS + def_bool y + bool "Intel IOMMU performance events" + depends on INTEL_IOMMU && PERF_EVENTS + help + Selecting this option will enable the performance monitoring + infrastructure in the Intel IOMMU. It collects information about + key events occurring during operation of the remapping hardware, + to aid performance tuning and debug. These are available on modern + processors which support Intel VT-d 4.0 and later. + endif # INTEL_IOMMU diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index fa0dae16441c..29d26a437132 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -5,4 +5,7 @@ obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o +ifdef CONFIG_INTEL_IOMMU obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +endif +obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 418af1db0192..4759f79ad7b9 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -34,6 +34,7 @@ #include "../irq_remapping.h" #include "perf.h" #include "trace.h" +#include "perfmon.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -1104,6 +1105,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (sts & DMA_GSTS_QIES) iommu->gcmd |= DMA_GCMD_QIE; + if (alloc_iommu_pmu(iommu)) + pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id); + raw_spin_lock_init(&iommu->register_lock); /* @@ -1131,6 +1135,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err_sysfs: iommu_device_sysfs_remove(&iommu->iommu); err_unmap: + free_iommu_pmu(iommu); unmap_iommu(iommu); error_free_seq_id: ida_free(&dmar_seq_ids, iommu->seq_id); @@ -1146,6 +1151,8 @@ static void free_iommu(struct intel_iommu *iommu) iommu_device_sysfs_remove(&iommu->iommu); } + free_iommu_pmu(iommu); + if (iommu->irq) { if (iommu->pr_irq) { free_irq(iommu->pr_irq, iommu); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index c99cb715bd9a..c1348bedab3b 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -125,6 +125,11 @@ #define DMAR_MTRR_PHYSMASK8_REG 0x208 #define DMAR_MTRR_PHYSBASE9_REG 0x210 #define DMAR_MTRR_PHYSMASK9_REG 0x218 +#define DMAR_PERFCAP_REG 0x300 +#define DMAR_PERFCFGOFF_REG 0x310 +#define DMAR_PERFOVFOFF_REG 0x318 +#define DMAR_PERFCNTROFF_REG 0x31c +#define DMAR_PERFEVNTCAP_REG 0x380 #define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ #define DMAR_VCMD_REG 0xe00 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ @@ -148,6 +153,7 @@ */ #define cap_esrtps(c) (((c) >> 63) & 1) #define cap_esirtps(c) (((c) >> 62) & 1) +#define cap_ecmds(c) (((c) >> 61) & 1) #define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) @@ -179,7 +185,8 @@ * Extended Capability Register */ -#define ecap_rps(e) (((e) >> 49) & 0x1) +#define ecap_pms(e) (((e) >> 51) & 0x1) +#define ecap_rps(e) (((e) >> 49) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) @@ -210,6 +217,22 @@ #define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) #define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ +/* + * Decoding Perf Capability Register + */ +#define pcap_num_cntr(p) ((p) & 0xffff) +#define pcap_cntr_width(p) (((p) >> 16) & 0x7f) +#define pcap_num_event_group(p) (((p) >> 24) & 0x1f) +#define pcap_filters_mask(p) (((p) >> 32) & 0x1f) +#define pcap_interrupt(p) (((p) >> 50) & 0x1) +/* The counter stride is calculated as 2 ^ (x+10) bytes */ +#define pcap_cntr_stride(p) (1ULL << ((((p) >> 52) & 0x7) + 10)) + +/* + * Decoding Perf Event Capability Register + */ +#define pecap_es(p) ((p) & 0xfffffff) + /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ @@ -561,6 +584,22 @@ struct dmar_domain { iommu core */ }; +struct iommu_pmu { + struct intel_iommu *iommu; + u32 num_cntr; /* Number of counters */ + u32 num_eg; /* Number of event group */ + u32 cntr_width; /* Counter width */ + u32 cntr_stride; /* Counter Stride */ + u32 filter; /* Bitmask of filter support */ + void __iomem *base; /* the PerfMon base address */ + void __iomem *cfg_reg; /* counter configuration base address */ + void __iomem *cntr_reg; /* counter 0 address*/ + void __iomem *overflow; /* overflow status register */ + + u64 *evcap; /* Indicates all supported events */ + u32 **cntr_evcap; /* Supported events of each counter. */ +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -608,6 +647,8 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; void *perf_statistic; + + struct iommu_pmu *pmu; }; /* PCI domain-device relationship */ diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3f03039e5cce..32432d82d774 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -435,6 +435,9 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, if (!info || !info->ats_enabled) return; + if (pci_dev_is_disconnected(to_pci_dev(dev))) + return; + sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; pfsid = info->pfsid; diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c new file mode 100644 index 000000000000..db5791a54455 --- /dev/null +++ b/drivers/iommu/intel/perfmon.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support Intel IOMMU PerfMon + * Copyright(c) 2023 Intel Corporation. + */ +#define pr_fmt(fmt) "DMAR: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include "iommu.h" +#include "perfmon.h" + +static inline void __iomem * +get_perf_reg_address(struct intel_iommu *iommu, u32 offset) +{ + u32 off = dmar_readl(iommu->reg + offset); + + return iommu->reg + off; +} + +int alloc_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu; + int i, j, ret; + u64 perfcap; + u32 cap; + + if (!ecap_pms(iommu->ecap)) + return 0; + + /* The IOMMU PMU requires the ECMD support as well */ + if (!cap_ecmds(iommu->cap)) + return -ENODEV; + + perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); + /* The performance monitoring is not supported. */ + if (!perfcap) + return -ENODEV; + + /* Sanity check for the number of the counters and event groups */ + if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) + return -ENODEV; + + /* The interrupt on overflow is required */ + if (!pcap_interrupt(perfcap)) + return -ENODEV; + + iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); + if (!iommu_pmu) + return -ENOMEM; + + iommu_pmu->num_cntr = pcap_num_cntr(perfcap); + iommu_pmu->cntr_width = pcap_cntr_width(perfcap); + iommu_pmu->filter = pcap_filters_mask(perfcap); + iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); + iommu_pmu->num_eg = pcap_num_event_group(perfcap); + + iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL); + if (!iommu_pmu->evcap) { + ret = -ENOMEM; + goto free_pmu; + } + + /* Parse event group capabilities */ + for (i = 0; i < iommu_pmu->num_eg; i++) { + u64 pcap; + + pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + + i * IOMMU_PMU_CAP_REGS_STEP); + iommu_pmu->evcap[i] = pecap_es(pcap); + } + + iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap) { + ret = -ENOMEM; + goto free_pmu_evcap; + } + for (i = 0; i < iommu_pmu->num_cntr; i++) { + iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap[i]) { + ret = -ENOMEM; + goto free_pmu_cntr_evcap; + } + /* + * Set to the global capabilities, will adjust according + * to per-counter capabilities later. + */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; + } + + iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); + iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); + iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); + + /* + * Check per-counter capabilities. All counters should have the + * same capabilities on Interrupt on Overflow Support and Counter + * Width. + */ + for (i = 0; i < iommu_pmu->num_cntr; i++) { + cap = dmar_readl(iommu_pmu->cfg_reg + + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTRCAP_OFFSET); + if (!iommu_cntrcap_pcc(cap)) + continue; + + /* + * It's possible that some counters have a different + * capability because of e.g., HW bug. Check the corner + * case here and simply drop those counters. + */ + if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || + !iommu_cntrcap_ios(cap)) { + iommu_pmu->num_cntr = i; + pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n", + iommu_pmu->num_cntr); + } + + /* Clear the pre-defined events group */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = 0; + + /* Override with per-counter event capabilities */ + for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { + cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTREVCAP_OFFSET + + (j * IOMMU_PMU_OFF_REGS_STEP)); + iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); + /* + * Some events may only be supported by a specific counter. + * Track them in the evcap as well. + */ + iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); + } + } + + iommu_pmu->iommu = iommu; + iommu->pmu = iommu_pmu; + + return 0; + +free_pmu_cntr_evcap: + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); +free_pmu_evcap: + kfree(iommu_pmu->evcap); +free_pmu: + kfree(iommu_pmu); + + return ret; +} + +void free_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) + return; + + if (iommu_pmu->evcap) { + int i; + + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); + } + kfree(iommu_pmu->evcap); + kfree(iommu_pmu); + iommu->pmu = NULL; +} diff --git a/drivers/iommu/intel/perfmon.h b/drivers/iommu/intel/perfmon.h new file mode 100644 index 000000000000..4b0d9c1fea6f --- /dev/null +++ b/drivers/iommu/intel/perfmon.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * PERFCFGOFF_REG, PERFFRZOFF_REG + * PERFOVFOFF_REG, PERFCNTROFF_REG + */ +#define IOMMU_PMU_NUM_OFF_REGS 4 +#define IOMMU_PMU_OFF_REGS_STEP 4 + +#define IOMMU_PMU_CFG_OFFSET 0x100 +#define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80 +#define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84 +#define IOMMU_PMU_CFG_SIZE 0x8 +#define IOMMU_PMU_CFG_FILTERS_OFFSET 0x4 + +#define IOMMU_PMU_CAP_REGS_STEP 8 + +#define iommu_cntrcap_pcc(p) ((p) & 0x1) +#define iommu_cntrcap_cw(p) (((p) >> 8) & 0xff) +#define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1) +#define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf) + +#define iommu_event_select(p) ((p) & 0xfffffff) +#define iommu_event_group(p) (((p) >> 28) & 0xf) + +#ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS +int alloc_iommu_pmu(struct intel_iommu *iommu); +void free_iommu_pmu(struct intel_iommu *iommu); +#else +static inline int +alloc_iommu_pmu(struct intel_iommu *iommu) +{ + return 0; +} + +static inline void +free_iommu_pmu(struct intel_iommu *iommu) +{ +} +#endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 65b50b6dcd10..293a3aafadfd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1741,6 +1741,9 @@ static void probe_alloc_default_domain(struct bus_type *bus, { struct __group_domain_type gtype; + if (group->default_domain) + return; + memset(>ype, 0, sizeof(gtype)); /* Ask for default domain requirements of all devices in the group */ diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 83314b9d8f38..ee59647c2050 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -99,7 +99,8 @@ int __init irq_remapping_prepare(void) if (disable_irq_remap) return -ENOSYS; - if (intel_irq_remap_ops.prepare() == 0) + if (IS_ENABLED(CONFIG_INTEL_IOMMU) && + intel_irq_remap_ops.prepare() == 0) remap_ops = &intel_irq_remap_ops; else if (IS_ENABLED(CONFIG_AMD_IOMMU) && amd_iommu_irq_ops.prepare() == 0) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 8261066de07d..e4358393fe37 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -152,13 +152,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) return &dom->domain; } -static void sprd_iommu_domain_free(struct iommu_domain *domain) -{ - struct sprd_iommu_domain *dom = to_sprd_domain(domain); - - kfree(dom); -} - static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) { struct sprd_iommu_device *sdev = dom->sdev; @@ -231,6 +224,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en) sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); } +static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) +{ + size_t pgt_size; + + /* Nothing need to do if the domain hasn't been attached */ + if (!dom->sdev) + return; + + pgt_size = sprd_iommu_pgt_size(&dom->domain); + dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); + dom->sdev = NULL; + sprd_iommu_hw_en(dom->sdev, false); +} + +static void sprd_iommu_domain_free(struct iommu_domain *domain) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + + sprd_iommu_cleanup(dom); + kfree(dom); +} + static int sprd_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index 091b0fe7e324..5d4421f75b43 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -2,7 +2,7 @@ /* * Generic Broadcom Set Top Box Level 2 Interrupt controller driver * - * Copyright (C) 2014-2017 Broadcom + * Copyright (C) 2014-2024 Broadcom */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -113,6 +113,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc) generic_handle_domain_irq(b->domain, irq); } while (status); out: + /* Don't ack parent before all device writes are done */ + wmb(); + chained_irq_exit(chip, desc); } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d5355ff1221d..e2f2c8b594e6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3201,6 +3201,7 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); +out: if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -3236,7 +3237,6 @@ static void its_cpu_init_lpis(void) /* Make sure the GIC has seen the above */ dsb(sy); -out: gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), @@ -3876,8 +3876,9 @@ static int its_vpe_set_affinity(struct irq_data *d, bool force) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - int from, cpu = cpumask_first(mask_val); + struct cpumask common, *table_mask; unsigned long flags; + int from, cpu; /* * Changing affinity is mega expensive, so let's be as lazy as @@ -3893,19 +3894,22 @@ static int its_vpe_set_affinity(struct irq_data *d, * taken on any vLPI handling path that evaluates vpe->col_idx. */ from = vpe_to_cpuid_lock(vpe, &flags); + table_mask = gic_data_rdist_cpu(from)->vpe_table_mask; + + /* + * If we are offered another CPU in the same GICv4.1 ITS + * affinity, pick this one. Otherwise, any CPU will do. + */ + if (table_mask && cpumask_and(&common, mask_val, table_mask)) + cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common); + else + cpu = cpumask_first(mask_val); + if (from == cpu) goto out; vpe->col_idx = cpu; - /* - * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD - * is sharing its VPE table with the current one. - */ - if (gic_data_rdist_cpu(cpu)->vpe_table_mask && - cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask)) - goto out; - its_send_vmovp(vpe); its_vpe_db_proxy_move(vpe, from, cpu); diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 3d99b8bdd8ef..de115ee6e9ec 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -242,7 +242,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq, int ret; unsigned int i, type; unsigned long hwirq = 0; - struct eiointc *priv = domain->host_data; + struct eiointc_priv *priv = domain->host_data; ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type); if (ret) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 10c3e85c90c2..be71459c7465 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -28,8 +28,7 @@ #define ISCR 0x10 #define IITSR 0x14 #define TSCR 0x20 -#define TITSR0 0x24 -#define TITSR1 0x28 +#define TITSR(n) (0x24 + (n) * 4) #define TITSR0_MAX_INT 16 #define TITSEL_WIDTH 0x2 #define TSSR(n) (0x30 + ((n) * 4)) @@ -67,28 +66,43 @@ static struct rzg2l_irqc_priv *irq_data_to_priv(struct irq_data *data) return data->domain->host_data; } -static void rzg2l_irq_eoi(struct irq_data *d) +static void rzg2l_clear_irq_int(struct rzg2l_irqc_priv *priv, unsigned int hwirq) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START; - struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); + unsigned int hw_irq = hwirq - IRQC_IRQ_START; u32 bit = BIT(hw_irq); - u32 reg; + u32 iitsr, iscr; - reg = readl_relaxed(priv->base + ISCR); - if (reg & bit) - writel_relaxed(reg & ~bit, priv->base + ISCR); + iscr = readl_relaxed(priv->base + ISCR); + iitsr = readl_relaxed(priv->base + IITSR); + + /* + * ISCR can only be cleared if the type is falling-edge, rising-edge or + * falling/rising-edge. + */ + if ((iscr & bit) && (iitsr & IITSR_IITSEL_MASK(hw_irq))) { + writel_relaxed(iscr & ~bit, priv->base + ISCR); + /* + * Enforce that the posted write is flushed to prevent that the + * just handled interrupt is raised again. + */ + readl_relaxed(priv->base + ISCR); + } } -static void rzg2l_tint_eoi(struct irq_data *d) +static void rzg2l_clear_tint_int(struct rzg2l_irqc_priv *priv, unsigned int hwirq) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_TINT_START; - struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); - u32 bit = BIT(hw_irq); + u32 bit = BIT(hwirq - IRQC_TINT_START); u32 reg; reg = readl_relaxed(priv->base + TSCR); - if (reg & bit) + if (reg & bit) { writel_relaxed(reg & ~bit, priv->base + TSCR); + /* + * Enforce that the posted write is flushed to prevent that the + * just handled interrupt is raised again. + */ + readl_relaxed(priv->base + TSCR); + } } static void rzg2l_irqc_eoi(struct irq_data *d) @@ -98,9 +112,9 @@ static void rzg2l_irqc_eoi(struct irq_data *d) raw_spin_lock(&priv->lock); if (hw_irq >= IRQC_IRQ_START && hw_irq <= IRQC_IRQ_COUNT) - rzg2l_irq_eoi(d); + rzg2l_clear_irq_int(priv, hw_irq); else if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) - rzg2l_tint_eoi(d); + rzg2l_clear_tint_int(priv, hw_irq); raw_spin_unlock(&priv->lock); irq_chip_eoi_parent(d); } @@ -148,8 +162,10 @@ static void rzg2l_irqc_irq_enable(struct irq_data *d) static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START; struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); + unsigned int hwirq = irqd_to_hwirq(d); + u32 iitseln = hwirq - IRQC_IRQ_START; + bool clear_irq_int = false; u16 sense, tmp; switch (type & IRQ_TYPE_SENSE_MASK) { @@ -159,14 +175,17 @@ static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) case IRQ_TYPE_EDGE_FALLING: sense = IITSR_IITSEL_EDGE_FALLING; + clear_irq_int = true; break; case IRQ_TYPE_EDGE_RISING: sense = IITSR_IITSEL_EDGE_RISING; + clear_irq_int = true; break; case IRQ_TYPE_EDGE_BOTH: sense = IITSR_IITSEL_EDGE_BOTH; + clear_irq_int = true; break; default: @@ -175,22 +194,40 @@ static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) raw_spin_lock(&priv->lock); tmp = readl_relaxed(priv->base + IITSR); - tmp &= ~IITSR_IITSEL_MASK(hw_irq); - tmp |= IITSR_IITSEL(hw_irq, sense); + tmp &= ~IITSR_IITSEL_MASK(iitseln); + tmp |= IITSR_IITSEL(iitseln, sense); + if (clear_irq_int) + rzg2l_clear_irq_int(priv, hwirq); writel_relaxed(tmp, priv->base + IITSR); raw_spin_unlock(&priv->lock); return 0; } +static u32 rzg2l_disable_tint_and_set_tint_source(struct irq_data *d, struct rzg2l_irqc_priv *priv, + u32 reg, u32 tssr_offset, u8 tssr_index) +{ + u32 tint = (u32)(uintptr_t)irq_data_get_irq_chip_data(d); + u32 tien = reg & (TIEN << TSSEL_SHIFT(tssr_offset)); + + /* Clear the relevant byte in reg */ + reg &= ~(TSSEL_MASK << TSSEL_SHIFT(tssr_offset)); + /* Set TINT and leave TIEN clear */ + reg |= tint << TSSEL_SHIFT(tssr_offset); + writel_relaxed(reg, priv->base + TSSR(tssr_index)); + + return reg | tien; +} + static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) { struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); unsigned int hwirq = irqd_to_hwirq(d); u32 titseln = hwirq - IRQC_TINT_START; - u32 offset; - u8 sense; - u32 reg; + u32 tssr_offset = TSSR_OFFSET(titseln); + u8 tssr_index = TSSR_INDEX(titseln); + u8 index, sense; + u32 reg, tssr; switch (type & IRQ_TYPE_SENSE_MASK) { case IRQ_TYPE_EDGE_RISING: @@ -205,17 +242,21 @@ static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) return -EINVAL; } - offset = TITSR0; + index = 0; if (titseln >= TITSR0_MAX_INT) { titseln -= TITSR0_MAX_INT; - offset = TITSR1; + index = 1; } raw_spin_lock(&priv->lock); - reg = readl_relaxed(priv->base + offset); + tssr = readl_relaxed(priv->base + TSSR(tssr_index)); + tssr = rzg2l_disable_tint_and_set_tint_source(d, priv, tssr, tssr_offset, tssr_index); + reg = readl_relaxed(priv->base + TITSR(index)); reg &= ~(IRQ_MASK << (titseln * TITSEL_WIDTH)); reg |= sense << (titseln * TITSEL_WIDTH); - writel_relaxed(reg, priv->base + offset); + writel_relaxed(reg, priv->base + TITSR(index)); + rzg2l_clear_tint_int(priv, hwirq); + writel_relaxed(tssr, priv->base + TSSR(tssr_index)); raw_spin_unlock(&priv->lock); return 0; diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 2f4784860df5..be5e19a86ac3 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -144,7 +144,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (unlikely(irqd_irq_disabled(d))) { + plic_toggle(handler, d->hwirq, 1); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_toggle(handler, d->hwirq, 0); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } #ifdef CONFIG_SMP diff --git a/drivers/leds/flash/leds-sgm3140.c b/drivers/leds/flash/leds-sgm3140.c index d3a30ad94ac4..dd5d327c52a1 100644 --- a/drivers/leds/flash/leds-sgm3140.c +++ b/drivers/leds/flash/leds-sgm3140.c @@ -114,8 +114,11 @@ static int sgm3140_brightness_set(struct led_classdev *led_cdev, "failed to enable regulator: %d\n", ret); return ret; } + gpiod_set_value_cansleep(priv->flash_gpio, 0); gpiod_set_value_cansleep(priv->enable_gpio, 1); } else { + del_timer_sync(&priv->powerdown_timer); + gpiod_set_value_cansleep(priv->flash_gpio, 0); gpiod_set_value_cansleep(priv->enable_gpio, 0); ret = regulator_disable(priv->vin_regulator); if (ret) { diff --git a/drivers/leds/leds-aw2013.c b/drivers/leds/leds-aw2013.c index 0b52fc9097c6..3c05958578a1 100644 --- a/drivers/leds/leds-aw2013.c +++ b/drivers/leds/leds-aw2013.c @@ -397,6 +397,7 @@ error_reg: regulator_disable(chip->vcc_regulator); error: + mutex_unlock(&chip->mutex); mutex_destroy(&chip->mutex); return ret; } diff --git a/drivers/leds/trigger/ledtrig-panic.c b/drivers/leds/trigger/ledtrig-panic.c index 64abf2e91608..5a6b21bfeb9a 100644 --- a/drivers/leds/trigger/ledtrig-panic.c +++ b/drivers/leds/trigger/ledtrig-panic.c @@ -64,10 +64,13 @@ static long led_panic_blink(int state) static int __init ledtrig_panic_init(void) { + led_trigger_register_simple("panic", &trigger); + if (!trigger) + return -ENOMEM; + atomic_notifier_chain_register(&panic_notifier_list, &led_trigger_panic_nb); - led_trigger_register_simple("panic", &trigger); panic_blink = led_panic_blink; return 0; } diff --git a/drivers/mailbox/arm_mhuv2.c b/drivers/mailbox/arm_mhuv2.c index c6d4957c4da8..0ec21dcdbde7 100644 --- a/drivers/mailbox/arm_mhuv2.c +++ b/drivers/mailbox/arm_mhuv2.c @@ -553,7 +553,8 @@ static irqreturn_t mhuv2_sender_interrupt(int irq, void *data) priv = chan->con_priv; if (!IS_PROTOCOL_DOORBELL(priv)) { - writel_relaxed(1, &mhu->send->ch_wn[priv->ch_wn_idx + priv->windows - 1].int_clr); + for (i = 0; i < priv->windows; i++) + writel_relaxed(1, &mhu->send->ch_wn[priv->ch_wn_idx + i].int_clr); if (chan->cl) { mbox_chan_txdone(chan, 0); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 100a6a236d92..ec662f97ba82 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -614,7 +614,7 @@ static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, io_req.mem.ptr.vma = (char *)b->data + offset; } - r = dm_io(&io_req, 1, ®ion, NULL); + r = dm_io(&io_req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) b->end_io(b, errno_to_blk_status(r)); } @@ -1375,7 +1375,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) BUG_ON(dm_bufio_in_request()); - return dm_io(&io_req, 1, &io_reg, NULL); + return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); @@ -1398,7 +1398,7 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c BUG_ON(dm_bufio_in_request()); - return dm_io(&io_req, 1, &io_reg, NULL); + return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index 6ba3e9c91af5..8bc21d54884e 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -75,7 +75,7 @@ struct dm_cache_policy { * background work. */ int (*get_background_work)(struct dm_cache_policy *p, bool idle, - struct policy_work **result); + struct policy_work **result); /* * You must pass in the same work pointer that you were given, not diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 71dcd8fd4050..6314210d3697 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -21,6 +21,8 @@ #include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 +#define DM_MAX_TARGETS 1048576 +#define DM_MAX_TARGET_PARAMS 1024 struct dm_io; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ff515437d81e..25e51dc6e559 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -52,15 +52,17 @@ struct convert_context { struct completion restart; struct bio *bio_in; - struct bio *bio_out; struct bvec_iter iter_in; + struct bio *bio_out; struct bvec_iter iter_out; - u64 cc_sector; atomic_t cc_pending; + u64 cc_sector; union { struct skcipher_request *req; struct aead_request *req_aead; } r; + bool aead_recheck; + bool aead_failed; }; @@ -72,10 +74,8 @@ struct dm_crypt_io { struct bio *base_bio; u8 *integrity_metadata; bool integrity_metadata_from_pool:1; - bool in_tasklet:1; struct work_struct work; - struct tasklet_struct tasklet; struct convert_context ctx; @@ -83,6 +83,8 @@ struct dm_crypt_io { blk_status_t error; sector_t sector; + struct bvec_iter saved_bi_iter; + struct rb_node rb_node; } CRYPTO_MINALIGN_ATTR; @@ -1367,10 +1369,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { sector_t s = le64_to_cpu(*sector); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -1726,10 +1731,11 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; + io->ctx.aead_recheck = false; + io->ctx.aead_failed = false; io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; - io->in_tasklet = false; atomic_set(&io->io_pending, 0); } @@ -1738,11 +1744,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } -static void kcryptd_io_bio_endio(struct work_struct *work) -{ - struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - bio_endio(io->base_bio); -} +static void kcryptd_queue_read(struct dm_crypt_io *io); /* * One of the bios was finished. Check for completion of @@ -1757,6 +1759,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && + cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { + io->ctx.aead_recheck = true; + io->ctx.aead_failed = false; + io->error = 0; + kcryptd_queue_read(io); + return; + } + if (io->ctx.r.req) crypt_free_req(cc, io->ctx.r.req, base_bio); @@ -1767,20 +1778,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io) base_bio->bi_status = error; - /* - * If we are running this function from our tasklet, - * we can't call bio_endio() here, because it will call - * clone_endio() from dm.c, which in turn will - * free the current struct dm_crypt_io structure with - * our tasklet. In this case we need to delay bio_endio() - * execution to after the tasklet is done and dequeued. - */ - if (io->in_tasklet) { - INIT_WORK(&io->work, kcryptd_io_bio_endio); - queue_work(cc->io_queue, &io->work); - return; - } - bio_endio(base_bio); } @@ -1806,15 +1803,19 @@ static void crypt_endio(struct bio *clone) struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned int rw = bio_data_dir(clone); - blk_status_t error; + blk_status_t error = clone->bi_status; + + if (io->ctx.aead_recheck && !error) { + kcryptd_queue_crypt(io); + return; + } /* * free the processed pages */ - if (rw == WRITE) + if (rw == WRITE || io->ctx.aead_recheck) crypt_free_buffer_pages(cc, clone); - error = clone->bi_status; bio_put(clone); if (rw == READ && !error) { @@ -1835,6 +1836,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) struct crypt_config *cc = io->cc; struct bio *clone; + if (io->ctx.aead_recheck) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return 1; + crypt_inc_pending(io); + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + crypt_dec_pending(io); + return 1; + } + clone->bi_iter.bi_sector = cc->start + io->sector; + crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); + io->saved_bi_iter = clone->bi_iter; + dm_submit_bio_remap(io->base_bio, clone); + return 0; + } + /* * We need the original biovec array in order to decrypt the whole bio * data *afterwards* -- thanks to immutable biovecs we don't need to @@ -2061,6 +2078,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.bio_out = clone; io->ctx.iter_out = clone->bi_iter; + if (crypt_integrity_aead(cc)) { + bio_copy_data(clone, io->base_bio); + io->ctx.bio_in = clone; + io->ctx.iter_in = clone->bi_iter; + } + sector += bio_sectors(clone); crypt_inc_pending(io); @@ -2097,6 +2120,14 @@ dec: static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { + if (io->ctx.aead_recheck) { + if (!io->error) { + io->ctx.bio_in->bi_iter = io->saved_bi_iter; + bio_copy_data(io->base_bio, io->ctx.bio_in); + } + crypt_free_buffer_pages(io->cc, io->ctx.bio_in); + bio_put(io->ctx.bio_in); + } crypt_dec_pending(io); } @@ -2126,11 +2157,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, - io->sector); + if (io->ctx.aead_recheck) { + io->ctx.cc_sector = io->sector + cc->iv_offset; + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } else { + crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, + io->sector); - r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } /* * Crypto API backlogged the request, because its queue was full * and we're in softirq context, so continue from a workqueue @@ -2173,10 +2210,13 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (error == -EBADMSG) { sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -2213,11 +2253,6 @@ static void kcryptd_crypt(struct work_struct *work) kcryptd_crypt_write_convert(io); } -static void kcryptd_crypt_tasklet(unsigned long work) -{ - kcryptd_crypt((struct work_struct *)work); -} - static void kcryptd_queue_crypt(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2229,15 +2264,10 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) * irqs_disabled(): the kernel may run some IO completion from the idle thread, but * it is being executed with irqs disabled. */ - if (in_hardirq() || irqs_disabled()) { - io->in_tasklet = true; - tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); - tasklet_schedule(&io->tasklet); + if (!(in_hardirq() || irqs_disabled())) { + kcryptd_crypt(&io->work); return; } - - kcryptd_crypt(&io->work); - return; } INIT_WORK(&io->work, kcryptd_crypt); @@ -2505,7 +2535,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string type = &key_type_encrypted; set_key = set_key_encrypted; } else if (IS_ENABLED(CONFIG_TRUSTED_KEYS) && - !strncmp(key_string, "trusted:", key_desc - key_string + 1)) { + !strncmp(key_string, "trusted:", key_desc - key_string + 1)) { type = &key_type_trusted; set_key = set_key_trusted; } else { @@ -3112,7 +3142,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); - } else if (strcasecmp(sval, "none")) { + } else if (strcasecmp(sval, "none")) { ti->error = "Unknown integrity profile"; return -EINVAL; } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 77fcff82c82a..9c9e2b50c63c 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -279,6 +279,8 @@ struct dm_integrity_c { atomic64_t number_of_mismatches; + mempool_t recheck_pool; + struct notifier_block reboot_notifier; }; @@ -577,7 +579,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) } } - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) return r; @@ -1087,7 +1089,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, io_loc.sector = ic->start + SB_SECTORS + sector; io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ? "reading journal" : "writing journal", r); @@ -1203,7 +1205,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, u io_loc.sector = target; io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { WARN_ONCE(1, "asynchronous dm_io failed: %d", r); fn(-1UL, data); @@ -1530,7 +1532,7 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat fr.io_reg.count = 0, fr.ic = ic; init_completion(&fr.comp); - r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); + r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT); BUG_ON(r); } @@ -1699,6 +1701,85 @@ failed: get_random_bytes(result, ic->tag_size); } +static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum) +{ + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct dm_integrity_c *ic = dio->ic; + struct bvec_iter iter; + struct bio_vec bv; + sector_t sector, logical_sector, area, offset; + struct page *page; + + get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); + dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, + &dio->metadata_offset); + sector = get_data_sector(ic, area, offset); + logical_sector = dio->range.logical_sector; + + page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + unsigned pos = 0; + + do { + sector_t alignment; + char *mem; + char *buffer = page_to_virt(page); + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = sector; + io_loc.count = ic->sectors_per_block; + + /* Align the bio to logical block size */ + alignment = dio->range.logical_sector | bio_sectors(bio) | (PAGE_SIZE >> SECTOR_SHIFT); + alignment &= -alignment; + io_loc.sector = round_down(io_loc.sector, alignment); + io_loc.count += sector - io_loc.sector; + buffer += (sector - io_loc.sector) << SECTOR_SHIFT; + io_loc.count = round_up(io_loc.count, alignment); + + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); + if (unlikely(r)) { + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + integrity_sector_checksum(ic, logical_sector, buffer, checksum); + r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block, + &dio->metadata_offset, ic->tag_size, TAG_CMP); + if (r) { + if (r > 0) { + DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", + bio->bi_bdev, logical_sector); + atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, logical_sector, 0); + r = -EILSEQ; + } + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + mem = bvec_kmap_local(&bv); + memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT); + kunmap_local(mem); + + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; + logical_sector += ic->sectors_per_block; + } while (pos < bv.bv_len); + } +free_ret: + mempool_free(page, &ic->recheck_pool); +} + static void integrity_metadata(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); @@ -1783,19 +1864,12 @@ again: r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { - if (r > 0) { - sector_t s; - - s = sector - ((r + ic->tag_size - 1) / ic->tag_size); - DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", - bio->bi_bdev, s); - r = -EILSEQ; - atomic64_inc(&ic->number_of_mismatches); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", - bio, s, 0); - } if (likely(checksums != checksums_onstack)) kfree(checksums); + if (r > 0) { + integrity_recheck(dio, checksums_onstack); + goto skip_io; + } goto error; } @@ -2301,7 +2375,6 @@ offload_to_thread: else skip_check: dec_in_flight(dio); - } else { INIT_WORK(&dio->work, integrity_metadata); queue_work(ic->metadata_wq, &dio->work); @@ -2709,7 +2782,7 @@ next_chunk: io_loc.sector = get_data_sector(ic, area, offset); io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dm_integrity_io_error(ic, "reading data", r); goto err; @@ -4085,7 +4158,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { if (val < 1 << SECTOR_SHIFT || val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || - (val & (val -1))) { + (val & (val - 1))) { r = -EINVAL; ti->error = "Invalid block_size argument"; goto bad; @@ -4208,6 +4281,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } + r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); + if (r) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); if (!ic->metadata_wq) { @@ -4405,7 +4484,7 @@ try_smaller_buffer: if (ic->internal_hash) { size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); - if (!ic->recalc_wq ) { + if (!ic->recalc_wq) { ti->error = "Cannot allocate workqueue"; r = -ENOMEM; goto bad; @@ -4572,6 +4651,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); if (ic->io) dm_io_client_destroy(ic->io); diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index e488b05e35fa..ec97658387c3 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -295,7 +295,7 @@ static void km_dp_init(struct dpages *dp, void *data) *---------------------------------------------------------------*/ static void do_region(const blk_opf_t opf, unsigned int region, struct dm_io_region *where, struct dpages *dp, - struct io *io) + struct io *io, unsigned short ioprio) { struct bio *bio; struct page *page; @@ -344,6 +344,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, &io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); bio->bi_end_io = endio; + bio->bi_ioprio = ioprio; store_io_and_region_in_bio(bio, io, region); if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) { @@ -371,7 +372,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, static void dispatch_io(blk_opf_t opf, unsigned int num_regions, struct dm_io_region *where, struct dpages *dp, - struct io *io, int sync) + struct io *io, int sync, unsigned short ioprio) { int i; struct dpages old_pages = *dp; @@ -388,7 +389,7 @@ static void dispatch_io(blk_opf_t opf, unsigned int num_regions, for (i = 0; i < num_regions; i++) { *dp = old_pages; if (where[i].count || (opf & REQ_PREFLUSH)) - do_region(opf, i, where + i, dp, io); + do_region(opf, i, where + i, dp, io, ioprio); } /* @@ -413,7 +414,7 @@ static void sync_io_complete(unsigned long error, void *context) static int sync_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, blk_opf_t opf, struct dpages *dp, - unsigned long *error_bits) + unsigned long *error_bits, unsigned short ioprio) { struct io *io; struct sync_io sio; @@ -435,7 +436,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(opf, num_regions, where, dp, io, 1); + dispatch_io(opf, num_regions, where, dp, io, 1, ioprio); wait_for_completion_io(&sio.wait); @@ -447,7 +448,8 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, static int async_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, blk_opf_t opf, - struct dpages *dp, io_notify_fn fn, void *context) + struct dpages *dp, io_notify_fn fn, void *context, + unsigned short ioprio) { struct io *io; @@ -467,7 +469,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(opf, num_regions, where, dp, io, 0); + dispatch_io(opf, num_regions, where, dp, io, 0, ioprio); return 0; } @@ -509,7 +511,8 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, } int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *where, unsigned long *sync_error_bits) + struct dm_io_region *where, unsigned long *sync_error_bits, + unsigned short ioprio) { int r; struct dpages dp; @@ -520,11 +523,11 @@ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, if (!io_req->notify.fn) return sync_io(io_req->client, num_regions, where, - io_req->bi_opf, &dp, sync_error_bits); + io_req->bi_opf, &dp, sync_error_bits, ioprio); return async_io(io_req->client, num_regions, where, io_req->bi_opf, &dp, io_req->notify.fn, - io_req->notify.context); + io_req->notify.context, ioprio); } EXPORT_SYMBOL(dm_io); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 206e6ce554dc..4376754816ab 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1877,7 +1877,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern minimum_data_size - sizeof(param_kernel->version))) return -EFAULT; - if (param_kernel->data_size < minimum_data_size) { + if (unlikely(param_kernel->data_size < minimum_data_size) || + unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) { DMERR("Invalid data size in the ioctl structure: %u", param_kernel->data_size); return -EINVAL; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 0ef78e56aa88..fda51bd140ed 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -572,9 +572,9 @@ static int run_io_job(struct kcopyd_job *job) io_job_start(job->kc->throttle); if (job->op == REQ_OP_READ) - r = dm_io(&io_req, 1, &job->source, NULL); + r = dm_io(&io_req, 1, &job->source, NULL, IOPRIO_DEFAULT); else - r = dm_io(&io_req, job->num_dests, job->dests, NULL); + r = dm_io(&io_req, job->num_dests, job->dests, NULL, IOPRIO_DEFAULT); return r; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 05141eea18d3..da77878cb2c0 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -295,7 +295,7 @@ static int rw_header(struct log_c *lc, enum req_op op) { lc->io_req.bi_opf = op; - return dm_io(&lc->io_req, 1, &lc->header_location, NULL); + return dm_io(&lc->io_req, 1, &lc->header_location, NULL, IOPRIO_DEFAULT); } static int flush_header(struct log_c *lc) @@ -308,7 +308,7 @@ static int flush_header(struct log_c *lc) lc->io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - return dm_io(&lc->io_req, 1, &null_location, NULL); + return dm_io(&lc->io_req, 1, &null_location, NULL, IOPRIO_DEFAULT); } static int read_header(struct log_c *log) @@ -756,8 +756,8 @@ static void core_set_region_sync(struct dm_dirty_log *log, region_t region, log_clear_bit(lc, lc->recovering_bits, region); if (in_sync) { log_set_bit(lc, lc->sync_bits, region); - lc->sync_count++; - } else if (log_test_bit(lc->sync_bits, region)) { + lc->sync_count++; + } else if (log_test_bit(lc->sync_bits, region)) { lc->sync_count--; log_clear_bit(lc, lc->sync_bits, region); } @@ -765,9 +765,9 @@ static void core_set_region_sync(struct dm_dirty_log *log, region_t region, static region_t core_get_sync_count(struct dm_dirty_log *log) { - struct log_c *lc = (struct log_c *) log->context; + struct log_c *lc = (struct log_c *) log->context; - return lc->sync_count; + return lc->sync_count; } #define DMEMIT_SYNC \ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 4b7528dc2fd0..99b4738e867a 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -362,8 +362,8 @@ static struct { const int mode; const char *param; } _raid456_journal_mode[] = { - { R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" }, - { R5C_JOURNAL_MODE_WRITE_BACK , "writeback" } + { R5C_JOURNAL_MODE_WRITE_THROUGH, "writethrough" }, + { R5C_JOURNAL_MODE_WRITE_BACK, "writeback" } }; /* Return MD raid4/5/6 journal mode for dm @journal_mode one */ @@ -1114,7 +1114,7 @@ too_many: * [stripe_cache ] Stripe cache size for higher RAIDs * [region_size ] Defines granularity of bitmap * [journal_dev ] raid4/5/6 journaling deviice - * (i.e. write hole closing log) + * (i.e. write hole closing log) * * RAID10-only options: * [raid10_copies <# copies>] Number of copies. (Default: 2) @@ -3325,14 +3325,14 @@ static int raid_map(struct dm_target *ti, struct bio *bio) struct mddev *mddev = &rs->md; /* - * If we're reshaping to add disk(s)), ti->len and + * If we're reshaping to add disk(s), ti->len and * mddev->array_sectors will differ during the process * (ti->len > mddev->array_sectors), so we have to requeue * bios with addresses > mddev->array_sectors here or * there will occur accesses past EOD of the component * data images thus erroring the raid set. */ - if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) + if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors)) return DM_MAPIO_REQUEUE; md_handle_request(mddev, bio); @@ -3999,7 +3999,7 @@ static int raid_preresume(struct dm_target *ti) } /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */ - if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && + if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) || (rs->requested_bitmap_chunk_sectors && mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { @@ -4046,7 +4046,9 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ + mddev_lock_nointr(mddev); attempt_restore_of_faulty_devices(rs); + mddev_unlock(mddev); } if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index c38e63706d91..1004199ae77a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -273,7 +273,7 @@ static int mirror_flush(struct dm_target *ti) } error_bits = -1; - dm_io(&io_req, ms->nr_mirrors, io, &error_bits); + dm_io(&io_req, ms->nr_mirrors, io, &error_bits, IOPRIO_DEFAULT); if (unlikely(error_bits != 0)) { for (i = 0; i < ms->nr_mirrors; i++) if (test_bit(i, &error_bits)) @@ -543,7 +543,7 @@ static void read_async_bio(struct mirror *m, struct bio *bio) map_region(&io, m, bio); bio_set_m(bio, m); - BUG_ON(dm_io(&io_req, 1, &io, NULL)); + BUG_ON(dm_io(&io_req, 1, &io, NULL, IOPRIO_DEFAULT)); } static inline int region_in_sync(struct mirror_set *ms, region_t region, @@ -670,7 +670,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) */ bio_set_m(bio, get_default_mirror(ms)); - BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL)); + BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL, IOPRIO_DEFAULT)); } static void do_writes(struct mirror_set *ms, struct bio_list *writes) @@ -902,7 +902,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; kfree(ms); - return NULL; + return NULL; } ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord, diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 80b95746a43e..eee1cd3aa3fc 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -220,7 +220,7 @@ static void do_metadata(struct work_struct *work) { struct mdata_req *req = container_of(work, struct mdata_req, work); - req->result = dm_io(req->io_req, 1, req->where, NULL); + req->result = dm_io(req->io_req, 1, req->where, NULL, IOPRIO_DEFAULT); } /* @@ -244,7 +244,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf, struct mdata_req req; if (!metadata) - return dm_io(&io_req, 1, &where, NULL); + return dm_io(&io_req, 1, &where, NULL, IOPRIO_DEFAULT); req.where = &where; req.io_req = &io_req; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index b748901a4fb5..1c601508ce0b 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -679,8 +679,10 @@ static void dm_exception_table_exit(struct dm_exception_table *et, for (i = 0; i < size; i++) { slot = et->table + i; - hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) + hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) { kmem_cache_free(mem, ex); + cond_resched(); + } } kvfree(et->table); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index dac6a5f25f2b..aabb2435070b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -72,7 +72,7 @@ static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) n = get_child(n, CHILDREN_PER_NODE - 1); if (n >= t->counts[l]) - return (sector_t) - 1; + return (sector_t) -1; return get_node(t, l, n)[KEYS_PER_NODE - 1]; } @@ -128,7 +128,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, fmode_t mode, unsigned int num_targets, struct mapped_device *md) { - struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t; + + if (num_targets > DM_MAX_TARGETS) + return -EOVERFLOW; + + t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; @@ -143,7 +148,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (!num_targets) { kfree(t); - return -ENOMEM; + return -EOVERFLOW; } if (alloc_targets(t, num_targets)) { @@ -1528,7 +1533,7 @@ static bool dm_table_any_dev_attr(struct dm_table *t, if (ti->type->iterate_devices && ti->type->iterate_devices(ti, func, data)) return true; - } + } return false; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 601f9e4e6234..f24d89af7c5f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1179,9 +1179,9 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO); discard_parent->bi_end_io = passdown_endio; discard_parent->bi_private = m; - if (m->maybe_shared) - passdown_double_checking_shared_status(m, discard_parent); - else { + if (m->maybe_shared) + passdown_double_checking_shared_status(m, discard_parent); + else { struct discard_op op; begin_discard(&op, tc, discard_parent); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 24df610a2c43..6a707b41dc86 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -474,6 +474,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memcpy(data, io->recheck_buffer, len); + io->recheck_buffer += len; + + return 0; +} + +static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) +{ + struct page *page; + void *buffer; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + + page = mempool_alloc(&v->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = v->io; + io_loc.bdev = v->data_dev->bdev; + io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); + io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); + if (unlikely(r)) + goto free_ret; + + r = verity_hash(v, verity_io_hash_req(v, io), buffer, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io), true); + if (unlikely(r)) + goto free_ret; + + if (memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size)) { + r = -EIO; + goto free_ret; + } + + io->recheck_buffer = buffer; + r = verity_for_bv_block(v, io, &start, verity_recheck_copy); + if (unlikely(r)) + goto free_ret; + + r = 0; +free_ret: + mempool_free(page, &v->recheck_pool); + + return r; +} + static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len) { @@ -500,9 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io) { bool is_zero; struct dm_verity *v = io->v; -#if defined(CONFIG_DM_VERITY_FEC) struct bvec_iter start; -#endif struct bvec_iter iter_copy; struct bvec_iter *iter; struct crypto_wait wait; @@ -553,10 +608,7 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; -#if defined(CONFIG_DM_VERITY_FEC) - if (verity_fec_is_enabled(v)) - start = *iter; -#endif + start = *iter; r = verity_for_io_block(v, io, iter, &wait); if (unlikely(r < 0)) return r; @@ -578,6 +630,10 @@ static int verity_verify_io(struct dm_verity_io *io) * tasklet since it may sleep, so fallback to work-queue. */ return -EAGAIN; + } else if (verity_recheck(v, io, start, cur_block) == 0) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); + continue; #if defined(CONFIG_DM_VERITY_FEC) } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) { @@ -634,23 +690,6 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } -static void verity_tasklet(unsigned long data) -{ - struct dm_verity_io *io = (struct dm_verity_io *)data; - int err; - - io->in_tasklet = true; - err = verity_verify_io(io); - if (err == -EAGAIN || err == -ENOMEM) { - /* fallback to retrying with work-queue */ - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - return; - } - - verity_finish_io(io, errno_to_blk_status(err)); -} - static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; @@ -663,13 +702,8 @@ static void verity_end_io(struct bio *bio) return; } - if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) { - tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io); - tasklet_schedule(&io->tasklet); - } else { - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - } + INIT_WORK(&io->work, verity_work); + queue_work(io->v->verify_wq, &io->work); } /* @@ -950,6 +984,10 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); + mempool_exit(&v->recheck_pool); + if (v->io) + dm_io_client_destroy(v->io); + if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -1386,6 +1424,20 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } v->hash_blocks = hash_position; + r = mempool_init_page_pool(&v->recheck_pool, 1, 0); + if (unlikely(r)) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + + v->io = dm_io_client_create(); + if (IS_ERR(v->io)) { + r = PTR_ERR(v->io); + v->io = NULL; + ti->error = "Cannot allocate dm io"; + goto bad; + } + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), dm_bufio_alloc_callback, NULL, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f9d522c870e6..db93a91169d5 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -11,6 +11,7 @@ #ifndef DM_VERITY_H #define DM_VERITY_H +#include #include #include #include @@ -68,6 +69,9 @@ struct dm_verity { unsigned long *validated_blocks; /* bitset blocks validated */ char *signature_key_desc; /* signature keyring reference */ + + struct dm_io_client *io; + mempool_t recheck_pool; }; struct dm_verity_io { @@ -76,14 +80,15 @@ struct dm_verity_io { /* original value of bio->bi_end_io */ bio_end_io_t *orig_bi_end_io; + struct bvec_iter iter; + sector_t block; unsigned int n_blocks; bool in_tasklet; - struct bvec_iter iter; - struct work_struct work; - struct tasklet_struct tasklet; + + char *recheck_buffer; /* * Three variably-size fields follow this struct: diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index c6ff43a8f0b2..20fc84b24fc7 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -531,7 +531,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) req.notify.context = &endio; /* writing via async dm-io (implied by notify.fn above) won't return an error */ - (void) dm_io(&req, 1, ®ion, NULL); + (void) dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); i = j; } @@ -568,7 +568,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) req.notify.fn = NULL; req.notify.context = NULL; - r = dm_io(&req, 1, ®ion, NULL); + r = dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) writecache_error(wc, r, "error writing superblock"); } @@ -596,7 +596,7 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) req.client = wc->dm_io; req.notify.fn = NULL; - r = dm_io(&req, 1, ®ion, NULL); + r = dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) writecache_error(wc, r, "error flushing metadata: %d", r); } @@ -984,7 +984,7 @@ static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors req.client = wc->dm_io; req.notify.fn = NULL; - return dm_io(&req, 1, ®ion, NULL); + return dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); } static void writecache_resume(struct dm_target *ti) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0ec85d159bcd..29270f6f272f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2897,6 +2897,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned int suspend static void __dm_internal_resume(struct mapped_device *md) { + int r; + struct dm_table *map; + BUG_ON(!md->internal_suspend_count); if (--md->internal_suspend_count) @@ -2905,12 +2908,23 @@ static void __dm_internal_resume(struct mapped_device *md) if (dm_suspended_md(md)) goto done; /* resume from nested suspend */ - /* - * NOTE: existing callers don't need to call dm_table_resume_targets - * (which may fail -- so best to avoid it for now by passing NULL map) - */ - (void) __dm_resume(md, NULL); - + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + r = __dm_resume(md, map); + if (r) { + /* + * If a preresume method of some target failed, we are in a + * tricky situation. We can't return an error to the caller. We + * can't fake success because then the "resume" and + * "postsuspend" methods would not be paired correctly, and it + * would break various targets, for example it would cause list + * corruption in the "origin" target. + * + * So, we fake normal suspend here, to make sure that the + * "resume" and "postsuspend" methods will be paired correctly. + */ + DMERR("Preresume method failed: %d", r); + set_bit(DMF_SUSPENDED, &md->flags); + } done: clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); smp_mb__after_atomic(); diff --git a/drivers/md/md.c b/drivers/md/md.c index 6120f26a7969..788acc81e7a8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -564,8 +564,12 @@ static void submit_flushes(struct work_struct *ws) rcu_read_lock(); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + queue_work(md_wq, &mddev->flush_work); + } } static void md_submit_flush_data(struct work_struct *ws) @@ -963,9 +967,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, return; bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev, - 1, - REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA, - GFP_NOIO, &mddev->sync_set); + 1, + REQ_OP_WRITE | REQ_SYNC | REQ_IDLE | REQ_META + | REQ_PREFLUSH | REQ_FUA, + GFP_NOIO, &mddev->sync_set); atomic_inc(&rdev->nr_pending); @@ -1145,6 +1150,7 @@ struct super_type { struct md_rdev *refdev, int minor_version); int (*validate_super)(struct mddev *mddev, + struct md_rdev *freshest, struct md_rdev *rdev); void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); @@ -1282,8 +1288,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor /* * validate_super for 0.90.0 + * note: we are not using "freshest" for 0.9 superblock */ -static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) +static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev) { mdp_disk_t *desc; mdp_super_t *sb = page_address(rdev->sb_page); @@ -1795,7 +1802,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ return ret; } -static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) +static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev) { struct mdp_superblock_1 *sb = page_address(rdev->sb_page); __u64 ev1 = le64_to_cpu(sb->events); @@ -1891,13 +1898,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for - * spares (which don't need an event count) */ - ++ev1; + * spares (which don't need an event count). + * Similar to mdadm, we allow event counter difference of 1 + * from the freshest device. + */ if (rdev->desc_nr >= 0 && rdev->desc_nr < le32_to_cpu(sb->max_dev) && (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX || le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)) - if (ev1 < mddev->events) + if (ev1 + 1 < mddev->events) return -EINVAL; } else if (mddev->bitmap) { /* If adding to array with a bitmap, then we can accept an @@ -1918,8 +1927,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { role = MD_DISK_ROLE_SPARE; rdev->desc_nr = -1; - } else + } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) { + /* + * If we are assembling, and our event counter is smaller than the + * highest event counter, we cannot trust our superblock about the role. + * It could happen that our rdev was marked as Faulty, and all other + * superblocks were updated with +1 event counter. + * Then, before the next superblock update, which typically happens when + * remove_and_add_spares() removes the device from the array, there was + * a crash or reboot. + * If we allow current rdev without consulting the freshest superblock, + * we could cause data corruption. + * Note that in this case our event counter is smaller by 1 than the + * highest, otherwise, this rdev would not be allowed into array; + * both kernel and mdadm allow event counter difference of 1. + */ + struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page); + u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev); + + if (rdev->desc_nr >= freshest_max_dev) { + /* this is unexpected, better not proceed */ + pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n", + mdname(mddev), rdev->bdev, rdev->desc_nr, + freshest->bdev, freshest_max_dev); + return -EUCLEAN; + } + + role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]); + pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n", + mdname(mddev), rdev->bdev, role, role, freshest->bdev); + } else { role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + } switch(role) { case MD_DISK_ROLE_SPARE: /* spare */ break; @@ -2861,7 +2900,7 @@ static int add_bound_rdev(struct md_rdev *rdev) * and should be added immediately. */ super_types[mddev->major_version]. - validate_super(mddev, rdev); + validate_super(mddev, NULL/*freshest*/, rdev); if (add_journal) mddev_suspend(mddev); err = mddev->pers->hot_add_disk(mddev, rdev); @@ -3775,7 +3814,7 @@ static int analyze_sbs(struct mddev *mddev) } super_types[mddev->major_version]. - validate_super(mddev, freshest); + validate_super(mddev, NULL/*freshest*/, freshest); i = 0; rdev_for_each_safe(rdev, tmp, mddev) { @@ -3790,7 +3829,7 @@ static int analyze_sbs(struct mddev *mddev) } if (rdev != freshest) { if (super_types[mddev->major_version]. - validate_super(mddev, rdev)) { + validate_super(mddev, freshest, rdev)) { pr_warn("md: kicking non-fresh %pg from array!\n", rdev->bdev); md_kick_rdev_from_array(rdev); @@ -4864,11 +4903,21 @@ action_store(struct mddev *mddev, const char *page, size_t len) return -EINVAL; err = mddev_lock(mddev); if (!err) { - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { err = -EBUSY; - else { + } else if (mddev->reshape_position == MaxSector || + mddev->pers->check_reshape == NULL || + mddev->pers->check_reshape(mddev)) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); err = mddev->pers->start_reshape(mddev); + } else { + /* + * If reshape is still in progress, and + * md_check_recovery() can continue to reshape, + * don't restart reshape because data can be + * corrupted for raid456. + */ + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); } mddev_unlock(mddev); } @@ -6194,7 +6243,15 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - mddev->flags = 0; + /* + * Don't clear MD_CLOSING, or mddev can be opened again. + * 'hold_active != 0' means mddev is still in the creation + * process and will be used later. + */ + if (mddev->hold_active) + mddev->flags = 0; + else + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); mddev->sb_flags = 0; mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; @@ -6804,7 +6861,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) rdev->saved_raid_disk = rdev->raid_disk; } else super_types[mddev->major_version]. - validate_super(mddev, rdev); + validate_super(mddev, NULL/*freshest*/, rdev); if ((info->state & (1<raid_disk != info->raid_disk) { /* This was a hot-add request, but events doesn't @@ -7522,7 +7579,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, int err = 0; void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; - bool did_set_md_closing = false; if (!md_ioctl_valid(cmd)) return -ENOTTY; @@ -7609,7 +7665,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } - did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7772,7 +7827,7 @@ unlock: mddev->hold_active = 0; mddev_unlock(mddev); out: - if(did_set_md_closing) + if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY)) clear_bit(MD_CLOSING, &mddev->flags); return err; } diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 1cc783d7030d..18d949d63543 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -726,7 +726,7 @@ static int shadow_child(struct dm_btree_info *info, struct dm_btree_value_type * * nodes, so saves metadata space. */ static int split_two_into_three(struct shadow_spine *s, unsigned int parent_index, - struct dm_btree_value_type *vt, uint64_t key) + struct dm_btree_value_type *vt, uint64_t key) { int r; unsigned int middle_index; @@ -781,7 +781,7 @@ static int split_two_into_three(struct shadow_spine *s, unsigned int parent_inde if (shadow_current(s) != right) unlock_block(s->info, right); - return r; + return r; } @@ -1216,7 +1216,7 @@ int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root, static bool need_insert(struct btree_node *node, uint64_t *keys, unsigned int level, unsigned int index) { - return ((index >= le32_to_cpu(node->header.nr_entries)) || + return ((index >= le32_to_cpu(node->header.nr_entries)) || (le64_to_cpu(node->keys[index]) != keys[level])); } diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index af800efed9f3..4833a3998c1d 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -390,7 +390,7 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, } int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, - dm_block_t begin, dm_block_t end, dm_block_t *b) + dm_block_t begin, dm_block_t end, dm_block_t *b) { int r; uint32_t count; diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 706ceb85d680..63d9a72e3265 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -120,7 +120,7 @@ int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result); int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, - dm_block_t begin, dm_block_t end, dm_block_t *result); + dm_block_t begin, dm_block_t end, dm_block_t *result); /* * The next three functions return (via nr_allocations) the net number of diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7b318e7e8d45..009f7ffe4e10 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -920,6 +920,7 @@ static void flush_pending_writes(struct r10conf *conf) raid1_submit_write(bio); bio = next; + cond_resched(); } blk_finish_plug(&plug); } else @@ -1130,6 +1131,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) raid1_submit_write(bio); bio = next; + cond_resched(); } kfree(plug); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e4564ca1f243..8cf2317857e0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2420,7 +2420,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) atomic_inc(&conf->active_stripes); raid5_release_stripe(sh); - conf->max_nr_stripes++; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1); return 1; } @@ -2717,7 +2717,7 @@ static int drop_one_stripe(struct r5conf *conf) shrink_buffers(sh); free_stripe(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); - conf->max_nr_stripes--; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1); return 1; } @@ -6891,7 +6891,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) if (size <= 16 || size > 32768) return -EINVAL; - conf->min_nr_stripes = size; + WRITE_ONCE(conf->min_nr_stripes, size); mutex_lock(&conf->cache_size_mutex); while (size < conf->max_nr_stripes && drop_one_stripe(conf)) @@ -6903,7 +6903,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) if (!grow_one_stripe(conf, GFP_KERNEL)) { - conf->min_nr_stripes = conf->max_nr_stripes; + WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes); result = -ENOMEM; break; } @@ -7468,11 +7468,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); + int max_stripes = READ_ONCE(conf->max_nr_stripes); + int min_stripes = READ_ONCE(conf->min_nr_stripes); - if (conf->max_nr_stripes < conf->min_nr_stripes) + if (max_stripes < min_stripes) /* unlikely, but not impossible */ return 0; - return conf->max_nr_stripes - conf->min_nr_stripes; + return max_stripes - min_stripes; } static struct r5conf *setup_conf(struct mddev *mddev) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index 303d02b1d71c..fe30f5b0050d 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -113,6 +113,7 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) { unsigned pat; unsigned plane; + int ret = 0; tpg->max_line_width = max_w; for (pat = 0; pat < TPG_MAX_PAT_LINES; pat++) { @@ -121,14 +122,18 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) tpg->lines[pat][plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->lines[pat][plane]) - return -ENOMEM; + if (!tpg->lines[pat][plane]) { + ret = -ENOMEM; + goto free_lines; + } if (plane == 0) continue; tpg->downsampled_lines[pat][plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->downsampled_lines[pat][plane]) - return -ENOMEM; + if (!tpg->downsampled_lines[pat][plane]) { + ret = -ENOMEM; + goto free_lines; + } } } for (plane = 0; plane < TPG_MAX_PLANES; plane++) { @@ -136,18 +141,45 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) tpg->contrast_line[plane] = vzalloc(array_size(pixelsz, max_w)); - if (!tpg->contrast_line[plane]) - return -ENOMEM; + if (!tpg->contrast_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } tpg->black_line[plane] = vzalloc(array_size(pixelsz, max_w)); - if (!tpg->black_line[plane]) - return -ENOMEM; + if (!tpg->black_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } tpg->random_line[plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->random_line[plane]) - return -ENOMEM; + if (!tpg->random_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } } return 0; + +free_contrast_line: + for (plane = 0; plane < TPG_MAX_PLANES; plane++) { + vfree(tpg->contrast_line[plane]); + vfree(tpg->black_line[plane]); + vfree(tpg->random_line[plane]); + tpg->contrast_line[plane] = NULL; + tpg->black_line[plane] = NULL; + tpg->random_line[plane] = NULL; + } +free_lines: + for (pat = 0; pat < TPG_MAX_PAT_LINES; pat++) + for (plane = 0; plane < TPG_MAX_PLANES; plane++) { + vfree(tpg->lines[pat][plane]); + tpg->lines[pat][plane] = NULL; + if (plane == 0) + continue; + vfree(tpg->downsampled_lines[pat][plane]); + tpg->downsampled_lines[pat][plane] = NULL; + } + return ret; } EXPORT_SYMBOL_GPL(tpg_alloc); diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c index fa69158a65b1..cb1e4bc69dba 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c @@ -494,9 +494,15 @@ vb2_dma_sg_dmabuf_ops_end_cpu_access(struct dma_buf *dbuf, static int vb2_dma_sg_dmabuf_ops_vmap(struct dma_buf *dbuf, struct iosys_map *map) { - struct vb2_dma_sg_buf *buf = dbuf->priv; + struct vb2_dma_sg_buf *buf; + void *vaddr; - iosys_map_set_vaddr(map, buf->vaddr); + buf = dbuf->priv; + vaddr = vb2_dma_sg_vaddr(buf->vb, buf); + if (!vaddr) + return -EINVAL; + + iosys_map_set_vaddr(map, vaddr); return 0; } diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index d352e028491a..aefee2277254 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -494,6 +494,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL); if (!dvbdevfops) { kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } @@ -502,6 +503,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, if (!new_node) { kfree(dvbdevfops); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } @@ -535,6 +537,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, } list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; up_write(&minor_rwsem); mutex_unlock(&dvbdev_register_lock); return -EINVAL; @@ -557,6 +560,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvb_media_device_free(dvbdev); list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return ret; } @@ -575,6 +579,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvb_media_device_free(dvbdev); list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return PTR_ERR(clsdev); } diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index 04556b77c16c..0977564a4a1a 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -118,50 +118,32 @@ static const s32 stv0367cab_RF_LookUp2[RF_LOOKUP_TABLE2_SIZE][RF_LOOKUP_TABLE2_S } }; -static -int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) +static noinline_for_stack +int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - u8 buf[MAX_XFER_SIZE]; + u8 buf[3] = { MSB(reg), LSB(reg), data }; struct i2c_msg msg = { .addr = state->config->demod_address, .flags = 0, .buf = buf, - .len = len + 2 + .len = 3, }; int ret; - if (2 + len > sizeof(buf)) { - printk(KERN_WARNING - "%s: i2c wr reg=%04x: len=%d is too big!\n", - KBUILD_MODNAME, reg, len); - return -EINVAL; - } - - - buf[0] = MSB(reg); - buf[1] = LSB(reg); - memcpy(buf + 2, data, len); - if (i2cdebug) printk(KERN_DEBUG "%s: [%02x] %02x: %02x\n", __func__, - state->config->demod_address, reg, buf[2]); + state->config->demod_address, reg, data); ret = i2c_transfer(state->i2c, &msg, 1); if (ret != 1) printk(KERN_ERR "%s: i2c write error! ([%02x] %02x: %02x)\n", - __func__, state->config->demod_address, reg, buf[2]); + __func__, state->config->demod_address, reg, data); return (ret != 1) ? -EREMOTEIO : 0; } -static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) -{ - u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ - - return stv0367_writeregs(state, reg, &tmp, 1); -} - -static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) +static noinline_for_stack +u8 stv0367_readreg(struct stv0367_state *state, u16 reg) { u8 b0[] = { 0, 0 }; u8 b1[] = { 0 }; diff --git a/drivers/media/i2c/imx355.c b/drivers/media/i2c/imx355.c index b46178681c05..7fb02ba56012 100644 --- a/drivers/media/i2c/imx355.c +++ b/drivers/media/i2c/imx355.c @@ -1784,10 +1784,6 @@ static int imx355_probe(struct i2c_client *client) goto error_handler_free; } - ret = v4l2_async_register_subdev_sensor(&imx355->sd); - if (ret < 0) - goto error_media_entity; - /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. @@ -1796,9 +1792,15 @@ static int imx355_probe(struct i2c_client *client) pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&imx355->sd); + if (ret < 0) + goto error_media_entity_runtime_pm; + return 0; -error_media_entity: +error_media_entity_runtime_pm: + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&imx355->sd.entity); error_handler_free: diff --git a/drivers/media/i2c/ov13b10.c b/drivers/media/i2c/ov13b10.c index 549e5d93e568..368a3c2bfe34 100644 --- a/drivers/media/i2c/ov13b10.c +++ b/drivers/media/i2c/ov13b10.c @@ -589,6 +589,9 @@ struct ov13b10 { /* Streaming on/off */ bool streaming; + + /* True if the device has been identified */ + bool identified; }; #define to_ov13b10(_sd) container_of(_sd, struct ov13b10, sd) @@ -1023,12 +1026,42 @@ ov13b10_set_pad_format(struct v4l2_subdev *sd, return 0; } +/* Verify chip ID */ +static int ov13b10_identify_module(struct ov13b10 *ov13b) +{ + struct i2c_client *client = v4l2_get_subdevdata(&ov13b->sd); + int ret; + u32 val; + + if (ov13b->identified) + return 0; + + ret = ov13b10_read_reg(ov13b, OV13B10_REG_CHIP_ID, + OV13B10_REG_VALUE_24BIT, &val); + if (ret) + return ret; + + if (val != OV13B10_CHIP_ID) { + dev_err(&client->dev, "chip id mismatch: %x!=%x\n", + OV13B10_CHIP_ID, val); + return -EIO; + } + + ov13b->identified = true; + + return 0; +} + static int ov13b10_start_streaming(struct ov13b10 *ov13b) { struct i2c_client *client = v4l2_get_subdevdata(&ov13b->sd); const struct ov13b10_reg_list *reg_list; int ret, link_freq_index; + ret = ov13b10_identify_module(ov13b); + if (ret) + return ret; + /* Get out of from software reset */ ret = ov13b10_write_reg(ov13b, OV13B10_REG_SOFTWARE_RST, OV13B10_REG_VALUE_08BIT, OV13B10_SOFTWARE_RST); @@ -1144,27 +1177,6 @@ error: return ret; } -/* Verify chip ID */ -static int ov13b10_identify_module(struct ov13b10 *ov13b) -{ - struct i2c_client *client = v4l2_get_subdevdata(&ov13b->sd); - int ret; - u32 val; - - ret = ov13b10_read_reg(ov13b, OV13B10_REG_CHIP_ID, - OV13B10_REG_VALUE_24BIT, &val); - if (ret) - return ret; - - if (val != OV13B10_CHIP_ID) { - dev_err(&client->dev, "chip id mismatch: %x!=%x\n", - OV13B10_CHIP_ID, val); - return -EIO; - } - - return 0; -} - static const struct v4l2_subdev_video_ops ov13b10_video_ops = { .s_stream = ov13b10_set_stream, }; @@ -1379,6 +1391,7 @@ out_err: static int ov13b10_probe(struct i2c_client *client) { struct ov13b10 *ov13b; + bool full_power; int ret; /* Check HW config */ @@ -1395,11 +1408,14 @@ static int ov13b10_probe(struct i2c_client *client) /* Initialize subdev */ v4l2_i2c_subdev_init(&ov13b->sd, client, &ov13b10_subdev_ops); - /* Check module identity */ - ret = ov13b10_identify_module(ov13b); - if (ret) { - dev_err(&client->dev, "failed to find sensor: %d\n", ret); - return ret; + full_power = acpi_dev_state_d0(&client->dev); + if (full_power) { + /* Check module identity */ + ret = ov13b10_identify_module(ov13b); + if (ret) { + dev_err(&client->dev, "failed to find sensor: %d\n", ret); + return ret; + } } /* Set default mode to max resolution */ @@ -1423,21 +1439,27 @@ static int ov13b10_probe(struct i2c_client *client) goto error_handler_free; } - ret = v4l2_async_register_subdev_sensor(&ov13b->sd); - if (ret < 0) - goto error_media_entity; /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. */ - pm_runtime_set_active(&client->dev); + /* Set the device's state to active if it's in D0 state. */ + if (full_power) + pm_runtime_set_active(&client->dev); pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&ov13b->sd); + if (ret < 0) + goto error_media_entity_runtime_pm; + return 0; -error_media_entity: +error_media_entity_runtime_pm: + pm_runtime_disable(&client->dev); + if (full_power) + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&ov13b->sd.entity); error_handler_free: @@ -1457,6 +1479,7 @@ static void ov13b10_remove(struct i2c_client *client) ov13b10_free_controls(ov13b); pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); } static const struct dev_pm_ops ov13b10_pm_ops = { @@ -1480,6 +1503,7 @@ static struct i2c_driver ov13b10_i2c_driver = { }, .probe_new = ov13b10_probe, .remove = ov13b10_remove, + .flags = I2C_DRV_ACPI_WAIVE_D0_PROBE, }; module_i2c_driver(ov13b10_i2c_driver); diff --git a/drivers/media/i2c/ov9734.c b/drivers/media/i2c/ov9734.c index 8b0a158cb297..c5592ddb0cff 100644 --- a/drivers/media/i2c/ov9734.c +++ b/drivers/media/i2c/ov9734.c @@ -939,6 +939,7 @@ static void ov9734_remove(struct i2c_client *client) media_entity_cleanup(&sd->entity); v4l2_ctrl_handler_free(sd->ctrl_handler); pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); mutex_destroy(&ov9734->mutex); } @@ -984,13 +985,6 @@ static int ov9734_probe(struct i2c_client *client) goto probe_error_v4l2_ctrl_handler_free; } - ret = v4l2_async_register_subdev_sensor(&ov9734->sd); - if (ret < 0) { - dev_err(&client->dev, "failed to register V4L2 subdev: %d", - ret); - goto probe_error_media_entity_cleanup; - } - /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. @@ -999,9 +993,18 @@ static int ov9734_probe(struct i2c_client *client) pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&ov9734->sd); + if (ret < 0) { + dev_err(&client->dev, "failed to register V4L2 subdev: %d", + ret); + goto probe_error_media_entity_cleanup_pm; + } + return 0; -probe_error_media_entity_cleanup: +probe_error_media_entity_cleanup_pm: + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&ov9734->sd.entity); probe_error_v4l2_ctrl_handler_free: diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c index 200841c1f5cf..68628ccecd16 100644 --- a/drivers/media/i2c/tc358743.c +++ b/drivers/media/i2c/tc358743.c @@ -2094,9 +2094,6 @@ static int tc358743_probe(struct i2c_client *client) state->mbus_fmt_code = MEDIA_BUS_FMT_RGB888_1X24; sd->dev = &client->dev; - err = v4l2_async_register_subdev(sd); - if (err < 0) - goto err_hdl; mutex_init(&state->confctl_mutex); @@ -2154,6 +2151,10 @@ static int tc358743_probe(struct i2c_client *client) if (err) goto err_work_queues; + err = v4l2_async_register_subdev(sd); + if (err < 0) + goto err_work_queues; + v4l2_info(sd, "%s found @ 0x%x (%s)\n", client->name, client->addr << 1, client->adapter->name); diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index f268cf66053e..8919df09e3e8 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -509,14 +509,15 @@ static int media_pipeline_walk_push(struct media_pipeline_walk *walk, /* * Move the top entry link cursor to the next link. If all links of the entry - * have been visited, pop the entry itself. + * have been visited, pop the entry itself. Return true if the entry has been + * popped. */ -static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) +static bool media_pipeline_walk_pop(struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry; if (WARN_ON(walk->stack.top < 0)) - return; + return false; entry = media_pipeline_walk_top(walk); @@ -526,7 +527,7 @@ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) walk->stack.top); walk->stack.top--; - return; + return true; } entry->links = entry->links->next; @@ -534,6 +535,8 @@ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) dev_dbg(walk->mdev->dev, "media pipeline: moved entry %u to next link\n", walk->stack.top); + + return false; } /* Free all memory allocated while walking the pipeline. */ @@ -579,30 +582,24 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry = media_pipeline_walk_top(walk); - struct media_pad *pad; + struct media_pad *origin; struct media_link *link; struct media_pad *local; struct media_pad *remote; + bool last_link; int ret; - pad = entry->pad; + origin = entry->pad; link = list_entry(entry->links, typeof(*link), list); - media_pipeline_walk_pop(walk); + last_link = media_pipeline_walk_pop(walk); dev_dbg(walk->mdev->dev, "media pipeline: exploring link '%s':%u -> '%s':%u\n", link->source->entity->name, link->source->index, link->sink->entity->name, link->sink->index); - /* Skip links that are not enabled. */ - if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { - dev_dbg(walk->mdev->dev, - "media pipeline: skipping link (disabled)\n"); - return 0; - } - /* Get the local pad and remote pad. */ - if (link->source->entity == pad->entity) { + if (link->source->entity == origin->entity) { local = link->source; remote = link->sink; } else { @@ -614,25 +611,64 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, * Skip links that originate from a different pad than the incoming pad * that is not connected internally in the entity to the incoming pad. */ - if (pad != local && - !media_entity_has_pad_interdep(pad->entity, pad->index, local->index)) { + if (origin != local && + !media_entity_has_pad_interdep(origin->entity, origin->index, + local->index)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (no route)\n"); - return 0; + goto done; } /* - * Add the local and remote pads of the link to the pipeline and push - * them to the stack, if they're not already present. + * Add the local pad of the link to the pipeline and push it to the + * stack, if not already present. */ ret = media_pipeline_add_pad(pipe, walk, local); if (ret) return ret; + /* Similarly, add the remote pad, but only if the link is enabled. */ + if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { + dev_dbg(walk->mdev->dev, + "media pipeline: skipping link (disabled)\n"); + goto done; + } + ret = media_pipeline_add_pad(pipe, walk, remote); if (ret) return ret; +done: + /* + * If we're done iterating over links, iterate over pads of the entity. + * This is necessary to discover pads that are not connected with any + * link. Those are dead ends from a pipeline exploration point of view, + * but are still part of the pipeline and need to be added to enable + * proper validation. + */ + if (!last_link) + return 0; + + dev_dbg(walk->mdev->dev, + "media pipeline: adding unconnected pads of '%s'\n", + local->entity->name); + + media_entity_for_each_pad(origin->entity, local) { + /* + * Skip the origin pad (already handled), pad that have links + * (already discovered through iterating over links) and pads + * not internally connected. + */ + if (origin == local || !local->num_links || + !media_entity_has_pad_interdep(origin->entity, origin->index, + local->index)) + continue; + + ret = media_pipeline_add_pad(pipe, walk, local); + if (ret) + return ret; + } + return 0; } @@ -744,7 +780,6 @@ __must_check int __media_pipeline_start(struct media_pad *pad, struct media_pad *pad = ppad->pad; struct media_entity *entity = pad->entity; bool has_enabled_link = false; - bool has_link = false; struct media_link *link; dev_dbg(mdev->dev, "Validating pad '%s':%u\n", pad->entity->name, @@ -774,7 +809,6 @@ __must_check int __media_pipeline_start(struct media_pad *pad, /* Record if the pad has links and enabled links. */ if (link->flags & MEDIA_LNK_FL_ENABLED) has_enabled_link = true; - has_link = true; /* * Validate the link if it's enabled and has the @@ -812,7 +846,7 @@ __must_check int __media_pipeline_start(struct media_pad *pad, * 3. If the pad has the MEDIA_PAD_FL_MUST_CONNECT flag set, * ensure that it has either no link or an enabled link. */ - if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && has_link && + if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && !has_enabled_link) { dev_dbg(mdev->dev, "Pad '%s':%u must be connected by an enabled link\n", @@ -957,6 +991,9 @@ static void __media_entity_remove_link(struct media_entity *entity, /* Remove the reverse links for a data link. */ if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == MEDIA_LNK_FL_DATA_LINK) { + link->source->num_links--; + link->sink->num_links--; + if (link->source->entity == entity) remote = link->sink->entity; else @@ -1017,6 +1054,11 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, struct media_link *link; struct media_link *backlink; + if (flags & MEDIA_LNK_FL_LINK_TYPE) + return -EINVAL; + + flags |= MEDIA_LNK_FL_DATA_LINK; + if (WARN_ON(!source || !sink) || WARN_ON(source_pad >= source->num_pads) || WARN_ON(sink_pad >= sink->num_pads)) @@ -1032,7 +1074,7 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, link->source = &source->pads[source_pad]; link->sink = &sink->pads[sink_pad]; - link->flags = flags & ~MEDIA_LNK_FL_INTERFACE_LINK; + link->flags = flags; /* Initialize graph object embedded at the new link */ media_gobj_create(source->graph_obj.mdev, MEDIA_GRAPH_LINK, @@ -1063,6 +1105,9 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, sink->num_links++; source->num_links++; + link->source->num_links++; + link->sink->num_links++; + return 0; } EXPORT_SYMBOL_GPL(media_create_pad_link); diff --git a/drivers/media/pci/ddbridge/ddbridge-main.c b/drivers/media/pci/ddbridge/ddbridge-main.c index 91733ab9f58c..363badab7cf0 100644 --- a/drivers/media/pci/ddbridge/ddbridge-main.c +++ b/drivers/media/pci/ddbridge/ddbridge-main.c @@ -238,7 +238,7 @@ fail: ddb_unmap(dev); pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); - return -1; + return stat; } /****************************************************************************/ diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h index deb2288d4290..4f3d23b55b6d 100644 --- a/drivers/media/platform/amphion/vpu.h +++ b/drivers/media/platform/amphion/vpu.h @@ -152,7 +152,6 @@ struct vpu_core { struct vpu_mbox tx_type; struct vpu_mbox tx_data; struct vpu_mbox rx; - unsigned long cmd_seq; wait_queue_head_t ack_wq; struct completion cmp; @@ -251,6 +250,8 @@ struct vpu_inst { struct list_head cmd_q; void *pending; + unsigned long cmd_seq; + atomic_long_t last_response_cmd; struct vpu_inst_ops *ops; const struct vpu_format *formats; diff --git a/drivers/media/platform/amphion/vpu_cmds.c b/drivers/media/platform/amphion/vpu_cmds.c index 235b71398d40..c68163af29dc 100644 --- a/drivers/media/platform/amphion/vpu_cmds.c +++ b/drivers/media/platform/amphion/vpu_cmds.c @@ -34,6 +34,7 @@ struct vpu_cmd_t { struct vpu_cmd_request *request; struct vpu_rpc_event *pkt; unsigned long key; + atomic_long_t *last_response_cmd; }; static struct vpu_cmd_request vpu_cmd_requests[] = { @@ -117,6 +118,8 @@ static void vpu_free_cmd(struct vpu_cmd_t *cmd) { if (!cmd) return; + if (cmd->last_response_cmd) + atomic_long_set(cmd->last_response_cmd, cmd->key); vfree(cmd->pkt); vfree(cmd); } @@ -174,7 +177,8 @@ static int vpu_request_cmd(struct vpu_inst *inst, u32 id, void *data, return -ENOMEM; mutex_lock(&core->cmd_lock); - cmd->key = core->cmd_seq++; + cmd->key = ++inst->cmd_seq; + cmd->last_response_cmd = &inst->last_response_cmd; if (key) *key = cmd->key; if (sync) @@ -248,26 +252,12 @@ void vpu_clear_request(struct vpu_inst *inst) static bool check_is_responsed(struct vpu_inst *inst, unsigned long key) { - struct vpu_core *core = inst->core; - struct vpu_cmd_t *cmd; - bool flag = true; + unsigned long last_response = atomic_long_read(&inst->last_response_cmd); - mutex_lock(&core->cmd_lock); - cmd = inst->pending; - if (cmd && key == cmd->key) { - flag = false; - goto exit; - } - list_for_each_entry(cmd, &inst->cmd_q, list) { - if (key == cmd->key) { - flag = false; - break; - } - } -exit: - mutex_unlock(&core->cmd_lock); + if (key <= last_response && (last_response - key) < (ULONG_MAX >> 1)) + return true; - return flag; + return false; } static int sync_session_response(struct vpu_inst *inst, unsigned long key, long timeout, int try) diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c index e5c8e1a753cc..e7a18948c4ab 100644 --- a/drivers/media/platform/amphion/vpu_v4l2.c +++ b/drivers/media/platform/amphion/vpu_v4l2.c @@ -631,6 +631,7 @@ int vpu_v4l2_open(struct file *file, struct vpu_inst *inst) func = &vpu->decoder; atomic_set(&inst->ref_count, 0); + atomic_long_set(&inst->last_response_cmd, 0); vpu_inst_get(inst); inst->vpu = vpu; inst->core = vpu_request_core(vpu, inst->type); diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c index 3071b61946c3..f911d3c7dd86 100644 --- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c @@ -974,13 +974,13 @@ static void mtk_jpeg_dec_device_run(void *priv) if (ret < 0) goto dec_end; - schedule_delayed_work(&jpeg->job_timeout_work, - msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); - mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs); if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, &dst_buf->vb2_buf, &fb)) goto dec_end; + schedule_delayed_work(&jpeg->job_timeout_work, + msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); + spin_lock_irqsave(&jpeg->hw_lock, flags); mtk_jpeg_dec_reset(jpeg->reg_base); mtk_jpeg_dec_set_config(jpeg->reg_base, diff --git a/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c b/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c index b065ccd06914..378a1cba0144 100644 --- a/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c +++ b/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c @@ -26,7 +26,7 @@ static void mtk_mdp_vpu_handle_init_ack(const struct mdp_ipi_comm_ack *msg) vpu->inst_addr = msg->vpu_inst_addr; } -static void mtk_mdp_vpu_ipi_handler(const void *data, unsigned int len, +static void mtk_mdp_vpu_ipi_handler(void *data, unsigned int len, void *priv) { const struct mdp_ipi_comm_ack *msg = data; diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c index cfc7ebed8fb7..1ec29f1b163a 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c @@ -29,15 +29,7 @@ static int mtk_vcodec_vpu_set_ipi_register(struct mtk_vcodec_fw *fw, int id, mtk_vcodec_ipi_handler handler, const char *name, void *priv) { - /* - * The handler we receive takes a void * as its first argument. We - * cannot change this because it needs to be passed down to the rproc - * subsystem when SCP is used. VPU takes a const argument, which is - * more constrained, so the conversion below is safe. - */ - ipi_handler_t handler_const = (ipi_handler_t)handler; - - return vpu_ipi_register(fw->pdev, id, handler_const, name, priv); + return vpu_ipi_register(fw->pdev, id, handler, name, priv); } static int mtk_vcodec_vpu_ipi_send(struct mtk_vcodec_fw *fw, int id, void *buf, diff --git a/drivers/media/platform/mediatek/vpu/mtk_vpu.c b/drivers/media/platform/mediatek/vpu/mtk_vpu.c index 6beab9e86a22..44adf5cfc9bb 100644 --- a/drivers/media/platform/mediatek/vpu/mtk_vpu.c +++ b/drivers/media/platform/mediatek/vpu/mtk_vpu.c @@ -635,7 +635,7 @@ OUT_LOAD_FW: } EXPORT_SYMBOL_GPL(vpu_load_firmware); -static void vpu_init_ipi_handler(const void *data, unsigned int len, void *priv) +static void vpu_init_ipi_handler(void *data, unsigned int len, void *priv) { struct mtk_vpu *vpu = priv; const struct vpu_run *run = data; diff --git a/drivers/media/platform/mediatek/vpu/mtk_vpu.h b/drivers/media/platform/mediatek/vpu/mtk_vpu.h index a56053ff135a..da05f3e74081 100644 --- a/drivers/media/platform/mediatek/vpu/mtk_vpu.h +++ b/drivers/media/platform/mediatek/vpu/mtk_vpu.h @@ -17,7 +17,7 @@ * VPU interfaces with other blocks by share memory and interrupt. */ -typedef void (*ipi_handler_t) (const void *data, +typedef void (*ipi_handler_t) (void *data, unsigned int len, void *priv); diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index 61b25fcf826e..9b20cef5afc6 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -187,25 +187,16 @@ static int rga_setup_ctrls(struct rga_ctx *ctx) static struct rga_fmt formats[] = { { .fourcc = V4L2_PIX_FMT_ARGB32, - .color_swap = RGA_COLOR_RB_SWAP, + .color_swap = RGA_COLOR_ALPHA_SWAP, .hw_format = RGA_COLOR_FMT_ABGR8888, .depth = 32, .uv_factor = 1, .y_div = 1, .x_div = 1, }, - { - .fourcc = V4L2_PIX_FMT_XRGB32, - .color_swap = RGA_COLOR_RB_SWAP, - .hw_format = RGA_COLOR_FMT_XBGR8888, - .depth = 32, - .uv_factor = 1, - .y_div = 1, - .x_div = 1, - }, { .fourcc = V4L2_PIX_FMT_ABGR32, - .color_swap = RGA_COLOR_ALPHA_SWAP, + .color_swap = RGA_COLOR_RB_SWAP, .hw_format = RGA_COLOR_FMT_ABGR8888, .depth = 32, .uv_factor = 1, @@ -214,7 +205,7 @@ static struct rga_fmt formats[] = { }, { .fourcc = V4L2_PIX_FMT_XBGR32, - .color_swap = RGA_COLOR_ALPHA_SWAP, + .color_swap = RGA_COLOR_RB_SWAP, .hw_format = RGA_COLOR_FMT_XBGR8888, .depth = 32, .uv_factor = 1, diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c index d4540684ea9a..0bcb9db5ad19 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c @@ -701,6 +701,9 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx) unsigned int i; u32 status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h index a1293c45aae1..5776292f914a 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h @@ -61,6 +61,14 @@ struct dentry; RKISP1_CIF_ISP_EXP_END | \ RKISP1_CIF_ISP_HIST_MEASURE_RDY) +/* IRQ lines */ +enum rkisp1_irq_line { + RKISP1_IRQ_ISP = 0, + RKISP1_IRQ_MI, + RKISP1_IRQ_MIPI, + RKISP1_NUM_IRQS, +}; + /* enum for the resizer pads */ enum rkisp1_rsz_pad { RKISP1_RSZ_PAD_SINK, @@ -441,7 +449,6 @@ struct rkisp1_debug { * struct rkisp1_device - ISP platform device * * @base_addr: base register address - * @irq: the irq number * @dev: a pointer to the struct device * @clk_size: number of clocks * @clks: array of clocks @@ -459,6 +466,8 @@ struct rkisp1_debug { * @stream_lock: serializes {start/stop}_streaming callbacks between the capture devices. * @debug: debug params to be exposed on debugfs * @info: version-specific ISP information + * @irqs: IRQ line numbers + * @irqs_enabled: the hardware is enabled and can cause interrupts */ struct rkisp1_device { void __iomem *base_addr; @@ -479,6 +488,8 @@ struct rkisp1_device { struct mutex stream_lock; /* serialize {start/stop}_streaming cb between capture devices */ struct rkisp1_debug debug; const struct rkisp1_info *info; + int irqs[RKISP1_NUM_IRQS]; + bool irqs_enabled; }; /* diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c index d7acc94e10f8..95b6e41c48ec 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c @@ -141,8 +141,20 @@ static void rkisp1_csi_disable(struct rkisp1_csi *csi) struct rkisp1_device *rkisp1 = csi->rkisp1; u32 val; - /* Mask and clear interrupts. */ + /* Mask MIPI interrupts. */ rkisp1_write(rkisp1, RKISP1_CIF_MIPI_IMSC, 0); + + /* Flush posted writes */ + rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMSC); + + /* + * Wait until the IRQ handler has ended. The IRQ handler may get called + * even after this, but it will return immediately as the MIPI + * interrupts have been masked. + */ + synchronize_irq(rkisp1->irqs[RKISP1_IRQ_MIPI]); + + /* Clear MIPI interrupt status */ rkisp1_write(rkisp1, RKISP1_CIF_MIPI_ICR, ~0); val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_CTRL); @@ -199,6 +211,9 @@ irqreturn_t rkisp1_csi_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 val, status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index 2b76339f9381..7a3b69ba51b9 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -114,6 +114,7 @@ struct rkisp1_isr_data { const char *name; irqreturn_t (*isr)(int irq, void *ctx); + u32 line_mask; }; /* ---------------------------------------------------------------------------- @@ -304,6 +305,24 @@ static int __maybe_unused rkisp1_runtime_suspend(struct device *dev) { struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); + rkisp1->irqs_enabled = false; + /* Make sure the IRQ handler will see the above */ + mb(); + + /* + * Wait until any running IRQ handler has returned. The IRQ handler + * may get called even after this (as it's a shared interrupt line) + * but the 'irqs_enabled' flag will make the handler return immediately. + */ + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) { + if (rkisp1->irqs[il] == -1) + continue; + + /* Skip if the irq line is the same as previous */ + if (il == 0 || rkisp1->irqs[il - 1] != rkisp1->irqs[il]) + synchronize_irq(rkisp1->irqs[il]); + } + clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks); return pinctrl_pm_select_sleep_state(dev); } @@ -320,6 +339,10 @@ static int __maybe_unused rkisp1_runtime_resume(struct device *dev) if (ret) return ret; + rkisp1->irqs_enabled = true; + /* Make sure the IRQ handler will see the above */ + mb(); + return 0; } @@ -442,17 +465,25 @@ error: static irqreturn_t rkisp1_isr(int irq, void *ctx) { + irqreturn_t ret = IRQ_NONE; + /* * Call rkisp1_capture_isr() first to handle the frame that * potentially completed using the current frame_sequence number before * it is potentially incremented by rkisp1_isp_isr() in the vertical * sync. */ - rkisp1_capture_isr(irq, ctx); - rkisp1_isp_isr(irq, ctx); - rkisp1_csi_isr(irq, ctx); - return IRQ_HANDLED; + if (rkisp1_capture_isr(irq, ctx) == IRQ_HANDLED) + ret = IRQ_HANDLED; + + if (rkisp1_isp_isr(irq, ctx) == IRQ_HANDLED) + ret = IRQ_HANDLED; + + if (rkisp1_csi_isr(irq, ctx) == IRQ_HANDLED) + ret = IRQ_HANDLED; + + return ret; } static const char * const px30_isp_clks[] = { @@ -463,9 +494,9 @@ static const char * const px30_isp_clks[] = { }; static const struct rkisp1_isr_data px30_isp_isrs[] = { - { "isp", rkisp1_isp_isr }, - { "mi", rkisp1_capture_isr }, - { "mipi", rkisp1_csi_isr }, + { "isp", rkisp1_isp_isr, BIT(RKISP1_IRQ_ISP) }, + { "mi", rkisp1_capture_isr, BIT(RKISP1_IRQ_MI) }, + { "mipi", rkisp1_csi_isr, BIT(RKISP1_IRQ_MIPI) }, }; static const struct rkisp1_info px30_isp_info = { @@ -484,7 +515,7 @@ static const char * const rk3399_isp_clks[] = { }; static const struct rkisp1_isr_data rk3399_isp_isrs[] = { - { NULL, rkisp1_isr }, + { NULL, rkisp1_isr, BIT(RKISP1_IRQ_ISP) | BIT(RKISP1_IRQ_MI) | BIT(RKISP1_IRQ_MIPI) }, }; static const struct rkisp1_info rk3399_isp_info = { @@ -535,6 +566,9 @@ static int rkisp1_probe(struct platform_device *pdev) if (IS_ERR(rkisp1->base_addr)) return PTR_ERR(rkisp1->base_addr); + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) + rkisp1->irqs[il] = -1; + for (i = 0; i < info->isr_size; i++) { irq = info->isrs[i].name ? platform_get_irq_byname(pdev, info->isrs[i].name) @@ -542,6 +576,11 @@ static int rkisp1_probe(struct platform_device *pdev) if (irq < 0) return irq; + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) { + if (info->isrs[i].line_mask & BIT(il)) + rkisp1->irqs[il] = irq; + } + ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED, dev_driver_string(dev), dev); if (ret) { diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c index 585cf3f53469..2af5c1a48070 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c @@ -281,11 +281,25 @@ static void rkisp1_isp_stop(struct rkisp1_isp *isp) * ISP(mi) stop in mi frame end -> Stop ISP(mipi) -> * Stop ISP(isp) ->wait for ISP isp off */ - /* stop and clear MI and ISP interrupts */ - rkisp1_write(rkisp1, RKISP1_CIF_ISP_IMSC, 0); - rkisp1_write(rkisp1, RKISP1_CIF_ISP_ICR, ~0); + /* Mask MI and ISP interrupts */ + rkisp1_write(rkisp1, RKISP1_CIF_ISP_IMSC, 0); rkisp1_write(rkisp1, RKISP1_CIF_MI_IMSC, 0); + + /* Flush posted writes */ + rkisp1_read(rkisp1, RKISP1_CIF_MI_IMSC); + + /* + * Wait until the IRQ handler has ended. The IRQ handler may get called + * even after this, but it will return immediately as the MI and ISP + * interrupts have been masked. + */ + synchronize_irq(rkisp1->irqs[RKISP1_IRQ_ISP]); + if (rkisp1->irqs[RKISP1_IRQ_ISP] != rkisp1->irqs[RKISP1_IRQ_MI]) + synchronize_irq(rkisp1->irqs[RKISP1_IRQ_MI]); + + /* Clear MI and ISP interrupt status */ + rkisp1_write(rkisp1, RKISP1_CIF_ISP_ICR, ~0); rkisp1_write(rkisp1, RKISP1_CIF_MI_ICR, ~0); /* stop ISP */ @@ -1009,6 +1023,9 @@ irqreturn_t rkisp1_isp_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 status, isp_err; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c index aa65d70b6270..7a2f558c981d 100644 --- a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c +++ b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c @@ -66,6 +66,7 @@ static void deinterlace_device_run(void *priv) struct vb2_v4l2_buffer *src, *dst; unsigned int hstep, vstep; dma_addr_t addr; + int i; src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); @@ -160,6 +161,26 @@ static void deinterlace_device_run(void *priv) deinterlace_write(dev, DEINTERLACE_CH1_HORZ_FACT, hstep); deinterlace_write(dev, DEINTERLACE_CH1_VERT_FACT, vstep); + /* neutral filter coefficients */ + deinterlace_set_bits(dev, DEINTERLACE_FRM_CTRL, + DEINTERLACE_FRM_CTRL_COEF_ACCESS); + readl_poll_timeout(dev->base + DEINTERLACE_STATUS, val, + val & DEINTERLACE_STATUS_COEF_STATUS, 2, 40); + + for (i = 0; i < 32; i++) { + deinterlace_write(dev, DEINTERLACE_CH0_HORZ_COEF0 + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH0_VERT_COEF + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH1_HORZ_COEF0 + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH1_VERT_COEF + i * 4, + DEINTERLACE_IDENTITY_COEF); + } + + deinterlace_clr_set_bits(dev, DEINTERLACE_FRM_CTRL, + DEINTERLACE_FRM_CTRL_COEF_ACCESS, 0); + deinterlace_clr_set_bits(dev, DEINTERLACE_FIELD_CTRL, DEINTERLACE_FIELD_CTRL_FIELD_CNT_MSK, DEINTERLACE_FIELD_CTRL_FIELD_CNT(ctx->field)); @@ -248,7 +269,6 @@ static irqreturn_t deinterlace_irq(int irq, void *data) static void deinterlace_init(struct deinterlace_dev *dev) { u32 val; - int i; deinterlace_write(dev, DEINTERLACE_BYPASS, DEINTERLACE_BYPASS_CSC); @@ -284,27 +304,7 @@ static void deinterlace_init(struct deinterlace_dev *dev) deinterlace_clr_set_bits(dev, DEINTERLACE_CHROMA_DIFF, DEINTERLACE_CHROMA_DIFF_TH_MSK, - DEINTERLACE_CHROMA_DIFF_TH(5)); - - /* neutral filter coefficients */ - deinterlace_set_bits(dev, DEINTERLACE_FRM_CTRL, - DEINTERLACE_FRM_CTRL_COEF_ACCESS); - readl_poll_timeout(dev->base + DEINTERLACE_STATUS, val, - val & DEINTERLACE_STATUS_COEF_STATUS, 2, 40); - - for (i = 0; i < 32; i++) { - deinterlace_write(dev, DEINTERLACE_CH0_HORZ_COEF0 + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH0_VERT_COEF + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH1_HORZ_COEF0 + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH1_VERT_COEF + i * 4, - DEINTERLACE_IDENTITY_COEF); - } - - deinterlace_clr_set_bits(dev, DEINTERLACE_FRM_CTRL, - DEINTERLACE_FRM_CTRL_COEF_ACCESS, 0); + DEINTERLACE_CHROMA_DIFF_TH(31)); } static inline struct deinterlace_ctx *deinterlace_file2ctx(struct file *file) @@ -931,11 +931,18 @@ static int deinterlace_runtime_resume(struct device *device) return ret; } + ret = reset_control_deassert(dev->rstc); + if (ret) { + dev_err(dev->dev, "Failed to apply reset\n"); + + goto err_exclusive_rate; + } + ret = clk_prepare_enable(dev->bus_clk); if (ret) { dev_err(dev->dev, "Failed to enable bus clock\n"); - goto err_exclusive_rate; + goto err_rst; } ret = clk_prepare_enable(dev->mod_clk); @@ -952,23 +959,16 @@ static int deinterlace_runtime_resume(struct device *device) goto err_mod_clk; } - ret = reset_control_deassert(dev->rstc); - if (ret) { - dev_err(dev->dev, "Failed to apply reset\n"); - - goto err_ram_clk; - } - deinterlace_init(dev); return 0; -err_ram_clk: - clk_disable_unprepare(dev->ram_clk); err_mod_clk: clk_disable_unprepare(dev->mod_clk); err_bus_clk: clk_disable_unprepare(dev->bus_clk); +err_rst: + reset_control_assert(dev->rstc); err_exclusive_rate: clk_rate_exclusive_put(dev->mod_clk); @@ -979,11 +979,12 @@ static int deinterlace_runtime_suspend(struct device *device) { struct deinterlace_dev *dev = dev_get_drvdata(device); - reset_control_assert(dev->rstc); - clk_disable_unprepare(dev->ram_clk); clk_disable_unprepare(dev->mod_clk); clk_disable_unprepare(dev->bus_clk); + + reset_control_assert(dev->rstc); + clk_rate_exclusive_put(dev->mod_clk); return 0; diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index fe17c7f98e81..52d82cbe7685 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->attach_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); @@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) { bpf_prog_put(prog); return PTR_ERR(rcdev); @@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->query.query_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->query.target_fd); + rcdev = rc_dev_get_from_fd(attr->query.target_fd, false); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 196806709259..69e630d85262 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP); if (err) { dev_err(irtoy->dev, "exit sample mode: %d\n", err); + kfree(buf); return err; } @@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "enter sample mode: %d\n", err); + kfree(buf); return err; } diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 184e0b35744f..adb8c794a2d7 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void) unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX); } -struct rc_dev *rc_dev_get_from_fd(int fd) +struct rc_dev *rc_dev_get_from_fd(int fd, bool write) { struct fd f = fdget(fd); struct lirc_fh *fh; @@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd) return ERR_PTR(-EINVAL); } + if (write && !(f.file->f_mode & FMODE_WRITE)) + return ERR_PTR(-EPERM); + fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h index ef1e95e1af7f..7df949fc65e2 100644 --- a/drivers/media/rc/rc-core-priv.h +++ b/drivers/media/rc/rc-core-priv.h @@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev); void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc); int lirc_register(struct rc_dev *dev); void lirc_unregister(struct rc_dev *dev); -struct rc_dev *rc_dev_get_from_fd(int fd); +struct rc_dev *rc_dev_get_from_fd(int fd, bool write); #else static inline int lirc_dev_init(void) { return 0; } static inline void lirc_dev_exit(void) {} diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c index 57ded9ff3f04..29bc63021c5a 100644 --- a/drivers/media/tuners/xc4000.c +++ b/drivers/media/tuners/xc4000.c @@ -1515,10 +1515,10 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) { struct xc4000_priv *priv = fe->tuner_priv; + mutex_lock(&priv->lock); *freq = priv->freq_hz + priv->freq_offset; if (debug) { - mutex_lock(&priv->lock); if ((priv->cur_fw.type & (BASE | FM | DTV6 | DTV7 | DTV78 | DTV8)) == BASE) { u16 snr = 0; @@ -1529,8 +1529,8 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) return 0; } } - mutex_unlock(&priv->lock); } + mutex_unlock(&priv->lock); dprintk(1, "%s()\n", __func__); diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 4d037c92af7c..bae76023cf71 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -4094,6 +4094,10 @@ static int em28xx_usb_probe(struct usb_interface *intf, * topology will likely change after the load of the em28xx subdrivers. */ #ifdef CONFIG_MEDIA_CONTROLLER + /* + * No need to check the return value, the device will still be + * usable without media controller API. + */ retval = media_device_register(dev->media_dev); #endif diff --git a/drivers/media/usb/go7007/go7007-driver.c b/drivers/media/usb/go7007/go7007-driver.c index 0c24e2984304..eb03f98b2ef1 100644 --- a/drivers/media/usb/go7007/go7007-driver.c +++ b/drivers/media/usb/go7007/go7007-driver.c @@ -80,7 +80,7 @@ static int go7007_load_encoder(struct go7007 *go) const struct firmware *fw_entry; char fw_name[] = "go7007/go7007fw.bin"; void *bounce; - int fw_len, rv = 0; + int fw_len; u16 intr_val, intr_data; if (go->boot_fw == NULL) { @@ -109,9 +109,11 @@ static int go7007_load_encoder(struct go7007 *go) go7007_read_interrupt(go, &intr_val, &intr_data) < 0 || (intr_val & ~0x1) != 0x5a5a) { v4l2_err(go, "error transferring firmware\n"); - rv = -1; + kfree(go->boot_fw); + go->boot_fw = NULL; + return -1; } - return rv; + return 0; } MODULE_FIRMWARE("go7007/go7007fw.bin"); diff --git a/drivers/media/usb/go7007/go7007-usb.c b/drivers/media/usb/go7007/go7007-usb.c index eeb85981e02b..762c13e49bfa 100644 --- a/drivers/media/usb/go7007/go7007-usb.c +++ b/drivers/media/usb/go7007/go7007-usb.c @@ -1201,7 +1201,9 @@ static int go7007_usb_probe(struct usb_interface *intf, u16 channel; /* read channel number from GPIO[1:0] */ - go7007_read_addr(go, 0x3c81, &channel); + if (go7007_read_addr(go, 0x3c81, &channel)) + goto allocfail; + channel &= 0x3; go->board_id = GO7007_BOARDID_ADLINK_MPG24; usb->board = board = &board_adlink_mpg24; diff --git a/drivers/media/usb/pvrusb2/pvrusb2-context.c b/drivers/media/usb/pvrusb2/pvrusb2-context.c index 1764674de98b..73c95ba2328a 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-context.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-context.c @@ -90,8 +90,10 @@ static void pvr2_context_destroy(struct pvr2_context *mp) } -static void pvr2_context_notify(struct pvr2_context *mp) +static void pvr2_context_notify(void *ptr) { + struct pvr2_context *mp = ptr; + pvr2_context_set_notify(mp,!0); } @@ -106,9 +108,7 @@ static void pvr2_context_check(struct pvr2_context *mp) pvr2_trace(PVR2_TRACE_CTXT, "pvr2_context %p (initialize)", mp); /* Finish hardware initialization */ - if (pvr2_hdw_initialize(mp->hdw, - (void (*)(void *))pvr2_context_notify, - mp)) { + if (pvr2_hdw_initialize(mp->hdw, pvr2_context_notify, mp)) { mp->video_stream.stream = pvr2_hdw_get_video_stream(mp->hdw); /* Trigger interface initialization. By doing this @@ -267,9 +267,9 @@ static void pvr2_context_exit(struct pvr2_context *mp) void pvr2_context_disconnect(struct pvr2_context *mp) { pvr2_hdw_disconnect(mp->hdw); - mp->disconnect_flag = !0; if (!pvr2_context_shutok()) pvr2_context_notify(mp); + mp->disconnect_flag = !0; } diff --git a/drivers/media/usb/pvrusb2/pvrusb2-dvb.c b/drivers/media/usb/pvrusb2/pvrusb2-dvb.c index 26811efe0fb5..9a9bae21c614 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-dvb.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-dvb.c @@ -88,8 +88,10 @@ static int pvr2_dvb_feed_thread(void *data) return stat; } -static void pvr2_dvb_notify(struct pvr2_dvb_adapter *adap) +static void pvr2_dvb_notify(void *ptr) { + struct pvr2_dvb_adapter *adap = ptr; + wake_up(&adap->buffer_wait_data); } @@ -149,7 +151,7 @@ static int pvr2_dvb_stream_do_start(struct pvr2_dvb_adapter *adap) } pvr2_stream_set_callback(pvr->video_stream.stream, - (pvr2_stream_callback) pvr2_dvb_notify, adap); + pvr2_dvb_notify, adap); ret = pvr2_stream_set_buffer_count(stream, PVR2_DVB_BUFFER_COUNT); if (ret < 0) return ret; diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c index c04ab7258d64..d608b793fa84 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c @@ -1033,8 +1033,10 @@ static int pvr2_v4l2_open(struct file *file) } -static void pvr2_v4l2_notify(struct pvr2_v4l2_fh *fhp) +static void pvr2_v4l2_notify(void *ptr) { + struct pvr2_v4l2_fh *fhp = ptr; + wake_up(&fhp->wait_data); } @@ -1067,7 +1069,7 @@ static int pvr2_v4l2_iosetup(struct pvr2_v4l2_fh *fh) hdw = fh->channel.mc_head->hdw; sp = fh->pdi->stream->stream; - pvr2_stream_set_callback(sp,(pvr2_stream_callback)pvr2_v4l2_notify,fh); + pvr2_stream_set_callback(sp, pvr2_v4l2_notify, fh); pvr2_hdw_set_stream_type(hdw,fh->pdi->config); if ((ret = pvr2_hdw_set_streaming(hdw,!0)) < 0) return ret; return pvr2_ioread_set_enabled(fh->rhp,!0); @@ -1198,11 +1200,6 @@ static void pvr2_v4l2_dev_init(struct pvr2_v4l2_dev *dip, dip->minor_type = pvr2_v4l_type_video; nr_ptr = video_nr; caps |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_AUDIO; - if (!dip->stream) { - pr_err(KBUILD_MODNAME - ": Failed to set up pvrusb2 v4l video dev due to missing stream instance\n"); - return; - } break; case VFL_TYPE_VBI: dip->config = pvr2_config_vbi; diff --git a/drivers/media/usb/stk1160/stk1160-video.c b/drivers/media/usb/stk1160/stk1160-video.c index 4e966f6bf608..366f0e4a5dc0 100644 --- a/drivers/media/usb/stk1160/stk1160-video.c +++ b/drivers/media/usb/stk1160/stk1160-video.c @@ -107,8 +107,7 @@ void stk1160_copy_video(struct stk1160 *dev, u8 *src, int len) /* * TODO: These stk1160_dbg are very spammy! - * We should 1) check why we are getting them - * and 2) add ratelimit. + * We should check why we are getting them. * * UPDATE: One of the reasons (the only one?) for getting these * is incorrect standard (mismatch between expected and configured). @@ -151,7 +150,7 @@ void stk1160_copy_video(struct stk1160 *dev, u8 *src, int len) /* Let the bug hunt begin! sanity checks! */ if (lencopy < 0) { - stk1160_dbg("copy skipped: negative lencopy\n"); + printk_ratelimited(KERN_DEBUG "copy skipped: negative lencopy\n"); return; } diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index be7fde1ed3ea..97645d6509e1 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -1084,11 +1084,17 @@ static int v4l2_m2m_register_entity(struct media_device *mdev, entity->function = function; ret = media_entity_pads_init(entity, num_pads, pads); - if (ret) + if (ret) { + kfree(entity->name); + entity->name = NULL; return ret; + } ret = media_device_register_entity(mdev, entity); - if (ret) + if (ret) { + kfree(entity->name); + entity->name = NULL; return ret; + } return 0; } diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 7c0bcd83e21e..4fe9467404bd 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1537,6 +1537,7 @@ config MFD_DAVINCI_VOICECODEC config MFD_TI_AM335X_TSCADC tristate "TI ADC / Touch Screen chip support" + depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST select MFD_CORE select REGMAP select REGMAP_MMIO diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index 5d3715a28b28..dbe100994371 100644 --- a/drivers/mfd/altera-sysmgr.c +++ b/drivers/mfd/altera-sysmgr.c @@ -110,7 +110,9 @@ struct regmap *altr_sysmgr_regmap_lookup_by_phandle(struct device_node *np, dev = driver_find_device_by_of_node(&altr_sysmgr_driver.driver, (void *)sysmgr_np); - of_node_put(sysmgr_np); + if (property) + of_node_put(sysmgr_np); + if (!dev) return ERR_PTR(-EPROBE_DEFER); diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 6196724ef39b..ecfe15122091 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -223,7 +223,9 @@ struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, return ERR_PTR(-ENODEV); regmap = syscon_node_to_regmap(syscon_np); - of_node_put(syscon_np); + + if (property) + of_node_put(syscon_np); return regmap; } diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index cc57cc820432..69cc24962706 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1990,7 +1990,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev) int i; spin_lock_irqsave(&cctx->lock, flags); - for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) { + for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) { if (cctx->session[i].sid == sess->sid) { cctx->session[i].valid = false; cctx->sesscount--; diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index d7daa01fe7ca..63c717053e36 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -151,6 +151,7 @@ static int lis3lv02d_i2c_probe(struct i2c_client *client, lis3_dev.init = lis3_i2c_init; lis3_dev.read = lis3_i2c_read; lis3_dev.write = lis3_i2c_write; + lis3_dev.reg_ctrl = lis3_reg_ctrl; lis3_dev.irq = client->irq; lis3_dev.ac = lis3lv02d_axis_map; lis3_dev.pm_dev = &client->dev; @@ -198,8 +199,14 @@ static int lis3lv02d_i2c_suspend(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct lis3lv02d *lis3 = i2c_get_clientdata(client); - if (!lis3->pdata || !lis3->pdata->wakeup_flags) + /* Turn on for wakeup if turned off by runtime suspend */ + if (lis3->pdata && lis3->pdata->wakeup_flags) { + if (pm_runtime_suspended(dev)) + lis3lv02d_poweron(lis3); + /* For non wakeup turn off if not already turned off by runtime suspend */ + } else if (!pm_runtime_suspended(dev)) lis3lv02d_poweroff(lis3); + return 0; } @@ -208,13 +215,12 @@ static int lis3lv02d_i2c_resume(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct lis3lv02d *lis3 = i2c_get_clientdata(client); - /* - * pm_runtime documentation says that devices should always - * be powered on at resume. Pm_runtime turns them off after system - * wide resume is complete. - */ - if (!lis3->pdata || !lis3->pdata->wakeup_flags || - pm_runtime_suspended(dev)) + /* Turn back off if turned on for wakeup and runtime suspended*/ + if (lis3->pdata && lis3->pdata->wakeup_flags) { + if (pm_runtime_suspended(dev)) + lis3lv02d_poweroff(lis3); + /* For non wakeup turn back on if not runtime suspended */ + } else if (!pm_runtime_suspended(dev)) lis3lv02d_poweron(lis3); return 0; diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index bdc65d50b945..3390ff511103 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -112,6 +112,8 @@ #define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ #define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ +#define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ +#define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 5bf0d50d55a0..f8219cbd2c7c 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -119,6 +119,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c index c61be3404c6f..504b836a7abf 100644 --- a/drivers/misc/open-dice.c +++ b/drivers/misc/open-dice.c @@ -142,7 +142,6 @@ static int __init open_dice_probe(struct platform_device *pdev) return -ENOMEM; *drvdata = (struct open_dice_drvdata){ - .lock = __MUTEX_INITIALIZER(drvdata->lock), .rmem = rmem, .misc = (struct miscdevice){ .parent = dev, @@ -152,6 +151,7 @@ static int __init open_dice_probe(struct platform_device *pdev) .mode = 0600, }, }; + mutex_init(&drvdata->lock); /* Index overflow check not needed, misc_register() will fail. */ snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 44a126eb19da..54d6cc5e9083 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -402,6 +402,10 @@ struct mmc_blk_ioc_data { struct mmc_ioc_cmd ic; unsigned char *buf; u64 buf_bytes; + unsigned int flags; +#define MMC_BLK_IOC_DROP BIT(0) /* drop this mrq */ +#define MMC_BLK_IOC_SBC BIT(1) /* use mrq.sbc */ + struct mmc_rpmb_data *rpmb; }; @@ -411,7 +415,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( struct mmc_blk_ioc_data *idata; int err; - idata = kmalloc(sizeof(*idata), GFP_KERNEL); + idata = kzalloc(sizeof(*idata), GFP_KERNEL); if (!idata) { err = -ENOMEM; goto out; @@ -467,7 +471,7 @@ static int mmc_blk_ioctl_copy_to_user(struct mmc_ioc_cmd __user *ic_ptr, } static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, - struct mmc_blk_ioc_data *idata) + struct mmc_blk_ioc_data **idatas, int i) { struct mmc_command cmd = {}, sbc = {}; struct mmc_data data = {}; @@ -477,10 +481,18 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, unsigned int busy_timeout_ms; int err; unsigned int target_part; + struct mmc_blk_ioc_data *idata = idatas[i]; + struct mmc_blk_ioc_data *prev_idata = NULL; if (!card || !md || !idata) return -EINVAL; + if (idata->flags & MMC_BLK_IOC_DROP) + return 0; + + if (idata->flags & MMC_BLK_IOC_SBC && i > 0) + prev_idata = idatas[i - 1]; + /* * The RPMB accesses comes in from the character device, so we * need to target these explicitly. Else we just target the @@ -547,7 +559,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, return err; } - if (idata->rpmb) { + if (idata->rpmb || prev_idata) { sbc.opcode = MMC_SET_BLOCK_COUNT; /* * We don't do any blockcount validation because the max size @@ -555,6 +567,8 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, * 'Reliable Write' bit here. */ sbc.arg = data.blocks | (idata->ic.write_flag & BIT(31)); + if (prev_idata) + sbc.arg = prev_idata->ic.arg; sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; mrq.sbc = &sbc; } @@ -572,6 +586,15 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, mmc_wait_for_req(card->host, &mrq); memcpy(&idata->ic.response, cmd.resp, sizeof(cmd.resp)); + if (prev_idata) { + memcpy(&prev_idata->ic.response, sbc.resp, sizeof(sbc.resp)); + if (sbc.error) { + dev_err(mmc_dev(card->host), "%s: sbc error %d\n", + __func__, sbc.error); + return sbc.error; + } + } + if (cmd.error) { dev_err(mmc_dev(card->host), "%s: cmd error %d\n", __func__, cmd.error); @@ -866,10 +889,11 @@ static const struct block_device_operations mmc_bdops = { static int mmc_blk_part_switch_pre(struct mmc_card *card, unsigned int part_type) { - const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB; + const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK; + const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB; int ret = 0; - if ((part_type & mask) == mask) { + if ((part_type & mask) == rpmb) { if (card->ext_csd.cmdq_en) { ret = mmc_cmdq_disable(card); if (ret) @@ -884,10 +908,11 @@ static int mmc_blk_part_switch_pre(struct mmc_card *card, static int mmc_blk_part_switch_post(struct mmc_card *card, unsigned int part_type) { - const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB; + const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK; + const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB; int ret = 0; - if ((part_type & mask) == mask) { + if ((part_type & mask) == rpmb) { mmc_retune_unpause(card->host); if (card->reenable_cmdq && !card->ext_csd.cmdq_en) ret = mmc_cmdq_enable(card); @@ -1057,6 +1082,20 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) md->reset_done &= ~type; } +static void mmc_blk_check_sbc(struct mmc_queue_req *mq_rq) +{ + struct mmc_blk_ioc_data **idata = mq_rq->drv_op_data; + int i; + + for (i = 1; i < mq_rq->ioc_count; i++) { + if (idata[i - 1]->ic.opcode == MMC_SET_BLOCK_COUNT && + mmc_op_multi(idata[i]->ic.opcode)) { + idata[i - 1]->flags |= MMC_BLK_IOC_DROP; + idata[i]->flags |= MMC_BLK_IOC_SBC; + } + } +} + /* * The non-block commands come back from the block layer after it queued it and * processed it with all other requests and then they get issued in this @@ -1084,11 +1123,14 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) if (ret) break; } + + mmc_blk_check_sbc(mq_rq); + fallthrough; case MMC_DRV_OP_IOCTL_RPMB: idata = mq_rq->drv_op_data; for (i = 0, ret = 0; i < mq_rq->ioc_count; i++) { - ret = __mmc_blk_ioctl_cmd(card, md, idata[i]); + ret = __mmc_blk_ioctl_cmd(card, md, idata, i); if (ret) break; } diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index f2723c2d8821..be586b18cfe8 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1087,10 +1087,12 @@ static int mmc_select_bus_width(struct mmc_card *card) static unsigned ext_csd_bits[] = { EXT_CSD_BUS_WIDTH_8, EXT_CSD_BUS_WIDTH_4, + EXT_CSD_BUS_WIDTH_1, }; static unsigned bus_widths[] = { MMC_BUS_WIDTH_8, MMC_BUS_WIDTH_4, + MMC_BUS_WIDTH_1, }; struct mmc_host *host = card->host; unsigned idx, bus_width = 0; diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index dd2a4b6ab6ad..e3c69c6b85a6 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -62,11 +62,15 @@ int mmc_gpio_alloc(struct mmc_host *host) int mmc_gpio_get_ro(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; + int cansleep; if (!ctx || !ctx->ro_gpio) return -ENOSYS; - return gpiod_get_value_cansleep(ctx->ro_gpio); + cansleep = gpiod_cansleep(ctx->ro_gpio); + return cansleep ? + gpiod_get_value_cansleep(ctx->ro_gpio) : + gpiod_get_value(ctx->ro_gpio); } EXPORT_SYMBOL(mmc_gpio_get_ro); diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index cc333ad67cac..2a99ffb61f8c 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -119,19 +119,14 @@ struct mmc_spi_host { struct spi_transfer status; struct spi_message readback; - /* underlying DMA-aware controller, or null */ - struct device *dma_dev; - /* buffer used for commands and for message "overhead" */ struct scratch *data; - dma_addr_t data_dma; /* Specs say to write ones most of the time, even when the card * has no need to read its input data; and many cards won't care. * This is our source of those ones. */ void *ones; - dma_addr_t ones_dma; }; @@ -147,11 +142,8 @@ static inline int mmc_cs_off(struct mmc_spi_host *host) return spi_setup(host->spi); } -static int -mmc_spi_readbytes(struct mmc_spi_host *host, unsigned len) +static int mmc_spi_readbytes(struct mmc_spi_host *host, unsigned int len) { - int status; - if (len > sizeof(*host->data)) { WARN_ON(1); return -EIO; @@ -159,19 +151,7 @@ mmc_spi_readbytes(struct mmc_spi_host *host, unsigned len) host->status.len = len; - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_FROM_DEVICE); - - status = spi_sync_locked(host->spi, &host->readback); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_FROM_DEVICE); - - return status; + return spi_sync_locked(host->spi, &host->readback); } static int mmc_spi_skip(struct mmc_spi_host *host, unsigned long timeout, @@ -506,23 +486,11 @@ mmc_spi_command_send(struct mmc_spi_host *host, t = &host->t; memset(t, 0, sizeof(*t)); t->tx_buf = t->rx_buf = data->status; - t->tx_dma = t->rx_dma = host->data_dma; t->len = cp - data->status; t->cs_change = 1; spi_message_add_tail(t, &host->m); - if (host->dma_dev) { - host->m.is_dma_mapped = 1; - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); - } status = spi_sync_locked(host->spi, &host->m); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); if (status < 0) { dev_dbg(&host->spi->dev, " ... write returned %d\n", status); cmd->error = status; @@ -540,9 +508,6 @@ mmc_spi_command_send(struct mmc_spi_host *host, * We always provide TX data for data and CRC. The MMC/SD protocol * requires us to write ones; but Linux defaults to writing zeroes; * so we explicitly initialize it to all ones on RX paths. - * - * We also handle DMA mapping, so the underlying SPI controller does - * not need to (re)do it for each message. */ static void mmc_spi_setup_data_message( @@ -552,11 +517,8 @@ mmc_spi_setup_data_message( { struct spi_transfer *t; struct scratch *scratch = host->data; - dma_addr_t dma = host->data_dma; spi_message_init(&host->m); - if (dma) - host->m.is_dma_mapped = 1; /* for reads, readblock() skips 0xff bytes before finding * the token; for writes, this transfer issues that token. @@ -570,8 +532,6 @@ mmc_spi_setup_data_message( else scratch->data_token = SPI_TOKEN_SINGLE; t->tx_buf = &scratch->data_token; - if (dma) - t->tx_dma = dma + offsetof(struct scratch, data_token); spi_message_add_tail(t, &host->m); } @@ -581,7 +541,6 @@ mmc_spi_setup_data_message( t = &host->t; memset(t, 0, sizeof(*t)); t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; /* length and actual buffer info are written later */ spi_message_add_tail(t, &host->m); @@ -591,14 +550,9 @@ mmc_spi_setup_data_message( if (direction == DMA_TO_DEVICE) { /* the actual CRC may get written later */ t->tx_buf = &scratch->crc_val; - if (dma) - t->tx_dma = dma + offsetof(struct scratch, crc_val); } else { t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; t->rx_buf = &scratch->crc_val; - if (dma) - t->rx_dma = dma + offsetof(struct scratch, crc_val); } spi_message_add_tail(t, &host->m); @@ -621,10 +575,7 @@ mmc_spi_setup_data_message( memset(t, 0, sizeof(*t)); t->len = (direction == DMA_TO_DEVICE) ? sizeof(scratch->status) : 1; t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; t->rx_buf = scratch->status; - if (dma) - t->rx_dma = dma + offsetof(struct scratch, status); t->cs_change = 1; spi_message_add_tail(t, &host->m); } @@ -653,23 +604,13 @@ mmc_spi_writeblock(struct mmc_spi_host *host, struct spi_transfer *t, if (host->mmc->use_spi_crc) scratch->crc_val = cpu_to_be16(crc_itu_t(0, t->tx_buf, t->len)); - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); status = spi_sync_locked(spi, &host->m); - if (status != 0) { dev_dbg(&spi->dev, "write error (%d)\n", status); return status; } - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - /* * Get the transmission data-response reply. It must follow * immediately after the data block we transferred. This reply @@ -718,8 +659,6 @@ mmc_spi_writeblock(struct mmc_spi_host *host, struct spi_transfer *t, } t->tx_buf += t->len; - if (host->dma_dev) - t->tx_dma += t->len; /* Return when not busy. If we didn't collect that status yet, * we'll need some more I/O. @@ -783,30 +722,12 @@ mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t, } leftover = status << 1; - if (host->dma_dev) { - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - dma_sync_single_for_device(host->dma_dev, - t->rx_dma, t->len, - DMA_FROM_DEVICE); - } - status = spi_sync_locked(spi, &host->m); if (status < 0) { dev_dbg(&spi->dev, "read error %d\n", status); return status; } - if (host->dma_dev) { - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - dma_sync_single_for_cpu(host->dma_dev, - t->rx_dma, t->len, - DMA_FROM_DEVICE); - } - if (bitshift) { /* Walk through the data and the crc and do * all the magic to get byte-aligned data. @@ -841,8 +762,6 @@ mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t, } t->rx_buf += t->len; - if (host->dma_dev) - t->rx_dma += t->len; return 0; } @@ -857,7 +776,6 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, struct mmc_data *data, u32 blk_size) { struct spi_device *spi = host->spi; - struct device *dma_dev = host->dma_dev; struct spi_transfer *t; enum dma_data_direction direction = mmc_get_dma_dir(data); struct scatterlist *sg; @@ -884,31 +802,8 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, */ for_each_sg(data->sg, sg, data->sg_len, n_sg) { int status = 0; - dma_addr_t dma_addr = 0; void *kmap_addr; unsigned length = sg->length; - enum dma_data_direction dir = direction; - - /* set up dma mapping for controller drivers that might - * use DMA ... though they may fall back to PIO - */ - if (dma_dev) { - /* never invalidate whole *shared* pages ... */ - if ((sg->offset != 0 || length != PAGE_SIZE) - && dir == DMA_FROM_DEVICE) - dir = DMA_BIDIRECTIONAL; - - dma_addr = dma_map_page(dma_dev, sg_page(sg), 0, - PAGE_SIZE, dir); - if (dma_mapping_error(dma_dev, dma_addr)) { - data->error = -EFAULT; - break; - } - if (direction == DMA_TO_DEVICE) - t->tx_dma = dma_addr + sg->offset; - else - t->rx_dma = dma_addr + sg->offset; - } /* allow pio too; we don't allow highmem */ kmap_addr = kmap(sg_page(sg)); @@ -941,8 +836,6 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, if (direction == DMA_FROM_DEVICE) flush_dcache_page(sg_page(sg)); kunmap(sg_page(sg)); - if (dma_dev) - dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir); if (status < 0) { data->error = status; @@ -977,21 +870,9 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, scratch->status[0] = SPI_TOKEN_STOP_TRAN; host->early_status.tx_buf = host->early_status.rx_buf; - host->early_status.tx_dma = host->early_status.rx_dma; host->early_status.len = statlen; - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - tmp = spi_sync_locked(spi, &host->m); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - if (tmp < 0) { if (!data->error) data->error = tmp; @@ -1265,52 +1146,6 @@ mmc_spi_detect_irq(int irq, void *mmc) return IRQ_HANDLED; } -#ifdef CONFIG_HAS_DMA -static int mmc_spi_dma_alloc(struct mmc_spi_host *host) -{ - struct spi_device *spi = host->spi; - struct device *dev; - - if (!spi->master->dev.parent->dma_mask) - return 0; - - dev = spi->master->dev.parent; - - host->ones_dma = dma_map_single(dev, host->ones, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, host->ones_dma)) - return -ENOMEM; - - host->data_dma = dma_map_single(dev, host->data, sizeof(*host->data), - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, host->data_dma)) { - dma_unmap_single(dev, host->ones_dma, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - return -ENOMEM; - } - - dma_sync_single_for_cpu(dev, host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); - - host->dma_dev = dev; - return 0; -} - -static void mmc_spi_dma_free(struct mmc_spi_host *host) -{ - if (!host->dma_dev) - return; - - dma_unmap_single(host->dma_dev, host->ones_dma, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - dma_unmap_single(host->dma_dev, host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); -} -#else -static inline int mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; } -static inline void mmc_spi_dma_free(struct mmc_spi_host *host) {} -#endif - static int mmc_spi_probe(struct spi_device *spi) { void *ones; @@ -1402,24 +1237,17 @@ static int mmc_spi_probe(struct spi_device *spi) host->powerup_msecs = 250; } - /* preallocate dma buffers */ + /* Preallocate buffers */ host->data = kmalloc(sizeof(*host->data), GFP_KERNEL); if (!host->data) goto fail_nobuf1; - status = mmc_spi_dma_alloc(host); - if (status) - goto fail_dma; - /* setup message for status/busy readback */ spi_message_init(&host->readback); - host->readback.is_dma_mapped = (host->dma_dev != NULL); spi_message_add_tail(&host->status, &host->readback); host->status.tx_buf = host->ones; - host->status.tx_dma = host->ones_dma; host->status.rx_buf = &host->data->status; - host->status.rx_dma = host->data_dma + offsetof(struct scratch, status); host->status.cs_change = 1; /* register card detect irq */ @@ -1464,9 +1292,8 @@ static int mmc_spi_probe(struct spi_device *spi) if (!status) has_ro = true; - dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", + dev_info(&spi->dev, "SD/MMC host %s%s%s%s\n", dev_name(&mmc->class_dev), - host->dma_dev ? "" : ", no DMA", has_ro ? "" : ", no WP", (host->pdata && host->pdata->setpower) ? "" : ", no poweroff", @@ -1477,8 +1304,6 @@ static int mmc_spi_probe(struct spi_device *spi) fail_gpiod_request: mmc_remove_host(mmc); fail_glue_init: - mmc_spi_dma_free(host); -fail_dma: kfree(host->data); fail_nobuf1: mmc_spi_put_pdata(spi); @@ -1500,7 +1325,6 @@ static void mmc_spi_remove(struct spi_device *spi) mmc_remove_host(mmc); - mmc_spi_dma_free(host); kfree(host->data); kfree(host->ones); diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 60bca78a72b1..0511583ffa76 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -200,6 +200,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; + host->dma_in_progress = true; + if (!host->variant->dma_lli || data->sg_len == 1 || idma->use_bounce_buffer) { u32 dma_addr; @@ -238,9 +240,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) return 0; } +static void sdmmc_idma_error(struct mmci_host *host) +{ + struct mmc_data *data = host->data; + struct sdmmc_idma *idma = host->dma_priv; + + if (!dma_inprogress(host)) + return; + + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; + data->host_cookie = 0; + + if (!idma->use_bounce_buffer) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); +} + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) { + if (!dma_inprogress(host)) + return; + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; if (!data->host_cookie) sdmmc_idma_unprep_data(host, data, 0); @@ -567,6 +590,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { .dma_setup = sdmmc_idma_setup, .dma_start = sdmmc_idma_start, .dma_finalize = sdmmc_idma_finalize, + .dma_error = sdmmc_idma_error, .set_clkreg = mmci_sdmmc_set_clkreg, .set_pwrreg = mmci_sdmmc_set_pwrreg, .busy_complete = sdmmc_busy_complete, diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 033be559a730..bfb7c8b96341 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1442,6 +1442,9 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + if (host->tuning_mode != SDHCI_TUNING_MODE_3) + mmc_retune_needed(host->mmc); + if (omap_host->con != -EINVAL) sdhci_runtime_suspend_host(host); diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index bca1d095b759..24bb0e9809e7 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -602,6 +602,35 @@ static void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_o2_enable_clk(host, clk); } +static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot = sdhci_priv(host); + u32 scratch_32 = 0; + u8 scratch_8 = 0; + + chip = slot->chip; + + if (mode == MMC_POWER_OFF) { + /* UnLock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 &= 0x7f; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + + /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */ + pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32); + scratch_32 &= ~(O2_SD_SEL_DLL); + pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32); + + /* Lock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 |= 0x80; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + } + + sdhci_set_power(host, mode, vdd); +} + static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot) { struct sdhci_pci_chip *chip; @@ -911,6 +940,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_power = sdhci_pci_o2_set_power, }; const struct sdhci_pci_fixes sdhci_o2 = { diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 8cf3a375de65..cc9d28b75eb9 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "sdhci-pltfm.h" @@ -109,6 +110,8 @@ #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18) #define XENON_LOGIC_TIMING_VALUE 0x00AA8977 +#define XENON_MAX_PHY_TIMEOUT_LOOPS 100 + /* * List offset of PHY registers and some special register values * in eMMC PHY 5.0 or eMMC PHY 5.1 @@ -216,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host) return 0; } +static int xenon_check_stability_internal_clk(struct sdhci_host *host) +{ + u32 reg; + int err; + + err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE, + 1100, 20000, false, host, SDHCI_CLOCK_CONTROL); + if (err) + dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n"); + + return err; +} + /* * eMMC 5.0/5.1 PHY init/re-init. * eMMC PHY init should be executed after: @@ -232,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs; + int ret = xenon_check_stability_internal_clk(host); + + if (ret) + return ret; + reg = sdhci_readl(host, phy_regs->timing_adj); reg |= XENON_PHY_INITIALIZAION; sdhci_writel(host, reg, phy_regs->timing_adj); @@ -259,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) /* get the wait time */ wait /= clock; wait++; - /* wait for host eMMC PHY init completes */ - udelay(wait); - reg = sdhci_readl(host, phy_regs->timing_adj); - reg &= XENON_PHY_INITIALIZAION; - if (reg) { + /* + * AC5X spec says bit must be polled until zero. + * We see cases in which timeout can take longer + * than the standard calculation on AC5X, which is + * expected following the spec comment above. + * According to the spec, we must wait as long as + * it takes for that bit to toggle on AC5X. + * Cap that with 100 delay loops so we won't get + * stuck here forever: + */ + + ret = read_poll_timeout(sdhci_readl, reg, + !(reg & XENON_PHY_INITIALIZAION), + wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait, + false, host, phy_regs->timing_adj); + if (ret) dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n", - wait); - return -ETIMEDOUT; - } + wait * XENON_MAX_PHY_TIMEOUT_LOOPS); - return 0; + return ret; } #define ARMADA_3700_SOC_PAD_1_8V 0x1 diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 437048bb8027..5024cae411d3 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -259,6 +259,8 @@ static void tmio_mmc_reset_work(struct work_struct *work) else mrq->cmd->error = -ETIMEDOUT; + /* No new calls yet, but disallow concurrent tmio_mmc_done_work() */ + host->mrq = ERR_PTR(-EBUSY); host->cmd = NULL; host->data = NULL; diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c index 9aa3027ca25e..f2abebb2d857 100644 --- a/drivers/mmc/host/wmt-sdmmc.c +++ b/drivers/mmc/host/wmt-sdmmc.c @@ -886,7 +886,6 @@ static int wmt_mci_remove(struct platform_device *pdev) { struct mmc_host *mmc; struct wmt_mci_priv *priv; - struct resource *res; u32 reg_tmp; mmc = platform_get_drvdata(pdev); @@ -914,9 +913,6 @@ static int wmt_mci_remove(struct platform_device *pdev) clk_disable_unprepare(priv->clk_sdmmc); clk_put(priv->clk_sdmmc); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - mmc_free_host(mmc); dev_info(&pdev->dev, "WMT MCI device removed\n"); diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c index 19dad5a23f94..8cdb3512107d 100644 --- a/drivers/mtd/maps/physmap-core.c +++ b/drivers/mtd/maps/physmap-core.c @@ -524,7 +524,7 @@ static int physmap_flash_probe(struct platform_device *dev) if (!info->maps[i].phys) info->maps[i].phys = res->start; - info->win_order = get_bitmask_order(resource_size(res)) - 1; + info->win_order = fls64(resource_size(res)) - 1; info->maps[i].size = BIT(info->win_order + (info->gpios ? info->gpios->ndescs : 0)); diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c index 452ecaf7775a..1cfe3dd0bad4 100644 --- a/drivers/mtd/nand/raw/lpc32xx_mlc.c +++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c @@ -303,8 +303,9 @@ static int lpc32xx_nand_device_ready(struct nand_chip *nand_chip) return 0; } -static irqreturn_t lpc3xxx_nand_irq(int irq, struct lpc32xx_nand_host *host) +static irqreturn_t lpc3xxx_nand_irq(int irq, void *data) { + struct lpc32xx_nand_host *host = data; uint8_t sr; /* Clear interrupt flag by reading status */ @@ -779,7 +780,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) goto release_dma_chan; } - if (request_irq(host->irq, (irq_handler_t)&lpc3xxx_nand_irq, + if (request_irq(host->irq, &lpc3xxx_nand_irq, IRQF_TRIGGER_HIGH, DRV_NAME, host)) { dev_err(&pdev->dev, "Error requesting NAND IRQ\n"); res = -ENXIO; diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 0aeac8ccbd0e..05925fb69460 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -63,7 +63,7 @@ #define CMDRWGEN(cmd_dir, ran, bch, short_mode, page_size, pages) \ ( \ (cmd_dir) | \ - ((ran) << 19) | \ + (ran) | \ ((bch) << 14) | \ ((short_mode) << 13) | \ (((page_size) & 0x7f) << 6) | \ diff --git a/drivers/mtd/parsers/ofpart_core.c b/drivers/mtd/parsers/ofpart_core.c index 192190c42fc8..e7b8e9d0a910 100644 --- a/drivers/mtd/parsers/ofpart_core.c +++ b/drivers/mtd/parsers/ofpart_core.c @@ -122,6 +122,25 @@ static int parse_fixed_partitions(struct mtd_info *master, a_cells = of_n_addr_cells(pp); s_cells = of_n_size_cells(pp); + if (!dedicated && s_cells == 0) { + /* + * This is a ugly workaround to not create + * regression on devices that are still creating + * partitions as direct children of the nand controller. + * This can happen in case the nand controller node has + * #size-cells equal to 0 and the firmware (e.g. + * U-Boot) just add the partitions there assuming + * 32-bit addressing. + * + * If you get this warning your firmware and/or DTS + * should be really fixed. + * + * This is working only for devices smaller than 4GiB. + */ + pr_warn("%s: ofpart partition %pOF (%pOF) #size-cells is wrongly set to <0>, assuming <1> for parsing partitions.\n", + master->name, pp, mtd_node); + s_cells = 1; + } if (len / 4 != a_cells + s_cells) { pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n", master->name, pp, diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index ca2d9efe62c3..1060e19205d2 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -85,9 +85,10 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi) sizeof(struct ubi_fm_scan_pool) + sizeof(struct ubi_fm_scan_pool) + (ubi->peb_count * sizeof(struct ubi_fm_ec)) + - (sizeof(struct ubi_fm_eba) + - (ubi->peb_count * sizeof(__be32))) + - sizeof(struct ubi_fm_volhdr) * UBI_MAX_VOLUMES; + ((sizeof(struct ubi_fm_eba) + + sizeof(struct ubi_fm_volhdr)) * + (UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT)) + + (ubi->peb_count * sizeof(__be32)); return roundup(size, ubi->leb_size); } diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index f700f0e4f2ec..6e5489e233dd 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -791,6 +791,12 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) * The number of supported volumes is limited by the eraseblock size * and by the UBI_MAX_VOLUMES constant. */ + + if (ubi->leb_size < UBI_VTBL_RECORD_SIZE) { + ubi_err(ubi, "LEB size too small for a volume record"); + return -EINVAL; + } + ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; if (ubi->vtbl_slots > UBI_MAX_VOLUMES) ubi->vtbl_slots = UBI_MAX_VOLUMES; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index fc5da5d7744d..9c4c2c7d90ef 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -985,7 +985,8 @@ static int alb_upper_dev_walk(struct net_device *upper, if (netif_is_macvlan(upper) && !strict_match) { tags = bond_verify_device_path(bond->dev, upper, 0); if (IS_ERR_OR_NULL(tags)) - BUG(); + return -ENOMEM; + alb_send_lp_vid(slave, upper->dev_addr, tags[0].vlan_proto, tags[0].vlan_id); kfree(tags); diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 8efa22d9f214..053d375eae4f 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -311,7 +311,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* Neither of TDC parameters nor TDC flags are * provided: do calculation */ - can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming, + can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt, &priv->ctrlmode, priv->ctrlmode_supported); } /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly * turned off. TDC is disabled: do nothing diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index c63e082dc57d..934600eccbaf 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) mutex_lock(&dev->alu_mutex); ctrl_addr = IND_ACC_TABLE(table) | addr; - ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); if (!ret) - ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); mutex_unlock(&dev->alu_mutex); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b988c8a40d53..07065c1af55e 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -998,20 +998,56 @@ unlock_exit: mutex_unlock(&priv->reg_mutex); } +/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std + * 802.1Qâ„¢-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA + * must only be propagated to C-VLAN and MAC Bridge components. That means + * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, + * these frames are supposed to be processed by the CPU (software). So we make + * the switch only forward them to the CPU port. And if received from a CPU + * port, forward to a single port. The software is responsible of making the + * switch conform to the latter by setting a single port as destination port on + * the special tag. + * + * This switch intellectual property cannot conform to this part of the standard + * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC + * DAs, it also includes :22-FF which the scope of propagation is not supposed + * to be restricted for these MAC DAs. + */ static void mt753x_trap_frames(struct mt7530_priv *priv) { - /* Trap BPDUs to the CPU port(s) */ - mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, + /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them + * VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK | + MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | + MT753X_BPDU_PORT_FW_MASK, + MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | MT753X_BPDU_CPU_ONLY); - /* Trap 802.1X PAE frames to the CPU port(s) */ - mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_PORT_FW_MASK, - MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY)); + /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress + * them VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK | + MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK | + MT753X_R01_PORT_FW_MASK, + MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); - /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */ - mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK, - MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); + /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress + * them VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | + MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK | + MT753X_R03_PORT_FW_MASK, + MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); } static int @@ -2187,11 +2223,11 @@ mt7530_setup(struct dsa_switch *ds) */ if (priv->mcm) { reset_control_assert(priv->rstc); - usleep_range(1000, 1100); + usleep_range(5000, 5100); reset_control_deassert(priv->rstc); } else { gpiod_set_value_cansleep(priv->reset, 0); - usleep_range(1000, 1100); + usleep_range(5000, 5100); gpiod_set_value_cansleep(priv->reset, 1); } @@ -2401,11 +2437,11 @@ mt7531_setup(struct dsa_switch *ds) */ if (priv->mcm) { reset_control_assert(priv->rstc); - usleep_range(1000, 1100); + usleep_range(5000, 5100); reset_control_deassert(priv->rstc); } else { gpiod_set_value_cansleep(priv->reset, 0); - usleep_range(1000, 1100); + usleep_range(5000, 5100); gpiod_set_value_cansleep(priv->reset, 1); } diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 6202b0f8c3f3..fa2afa67ceb0 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -63,14 +63,33 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 -#define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) +#define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x) #define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) #define MT753X_PAE_PORT_FW(x) FIELD_PREP(MT753X_PAE_PORT_FW_MASK, x) +#define MT753X_BPDU_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_BPDU_EG_TAG(x) FIELD_PREP(MT753X_BPDU_EG_TAG_MASK, x) +#define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) + +/* Register for :01 and :02 MAC DA frame control */ +#define MT753X_RGAC1 0x28 +#define MT753X_R02_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x) +#define MT753X_R02_PORT_FW_MASK GENMASK(18, 16) +#define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x) +#define MT753X_R01_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x) +#define MT753X_R01_PORT_FW_MASK GENMASK(2, 0) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c +#define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) +#define MT753X_R03_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x) +#define MT753X_R03_PORT_FW_MASK GENMASK(2, 0) enum mt753x_bpdu_port_fw { MT753X_BPDU_FOLLOW_MFC, @@ -251,6 +270,7 @@ enum mt7530_port_mode { enum mt7530_vlan_port_eg_tag { MT7530_VLAN_EG_DISABLED = 0, MT7530_VLAN_EG_CONSISTENT = 1, + MT7530_VLAN_EG_UNTAGGED = 4, }; enum mt7530_vlan_port_attr { diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index e693154cf803..97a47d8743fd 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -601,8 +601,8 @@ struct mv88e6xxx_ops { int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port); int (*serdes_get_strings)(struct mv88e6xxx_chip *chip, int port, uint8_t *data); - int (*serdes_get_stats)(struct mv88e6xxx_chip *chip, int port, - uint64_t *data); + size_t (*serdes_get_stats)(struct mv88e6xxx_chip *chip, int port, + uint64_t *data); /* SERDES registers for ethtool */ int (*serdes_get_regs_len)(struct mv88e6xxx_chip *chip, int port); diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index d94150d8f3f4..4c38df4982bf 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -342,8 +342,8 @@ static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip, return val; } -int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, - uint64_t *data) +size_t mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, + uint64_t *data) { struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port]; struct mv88e6352_serdes_hw_stat *stat; @@ -352,7 +352,7 @@ int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, err = mv88e6352_g2_scratch_port_has_serdes(chip, port); if (err <= 0) - return err; + return 0; BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) > ARRAY_SIZE(mv88e6xxx_port->serdes_stats)); @@ -798,8 +798,8 @@ static uint64_t mv88e6390_serdes_get_stat(struct mv88e6xxx_chip *chip, int lane, return reg[0] | ((u64)reg[1] << 16) | ((u64)reg[2] << 32); } -int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, - uint64_t *data) +size_t mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, + uint64_t *data) { struct mv88e6390_serdes_hw_stat *stat; int lane; diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h index 29bb4e91e2f6..67369054951f 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.h +++ b/drivers/net/dsa/mv88e6xxx/serdes.h @@ -165,13 +165,13 @@ irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port, int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port); int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip, int port, uint8_t *data); -int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, - uint64_t *data); +size_t mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, + uint64_t *data); int mv88e6390_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port); int mv88e6390_serdes_get_strings(struct mv88e6xxx_chip *chip, int port, uint8_t *data); -int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, - uint64_t *data); +size_t mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, + uint64_t *data); int mv88e6352_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port); void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p); diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 1e94ba1031ec..641692f716f8 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -842,10 +842,15 @@ qca8k_mdio_register(struct qca8k_priv *priv) struct dsa_switch *ds = priv->ds; struct device_node *mdio; struct mii_bus *bus; + int err; + + mdio = of_get_child_by_name(priv->dev->of_node, "mdio"); bus = devm_mdiobus_alloc(ds->dev); - if (!bus) - return -ENOMEM; + if (!bus) { + err = -ENOMEM; + goto out_put_node; + } bus->priv = (void *)priv; snprintf(bus->id, MII_BUS_ID_SIZE, "qca8k-%d.%d", @@ -855,12 +860,12 @@ qca8k_mdio_register(struct qca8k_priv *priv) ds->slave_mii_bus = bus; /* Check if the devicetree declare the port:phy mapping */ - mdio = of_get_child_by_name(priv->dev->of_node, "mdio"); if (of_device_is_available(mdio)) { bus->name = "qca8k slave mii"; bus->read = qca8k_internal_mdio_read; bus->write = qca8k_internal_mdio_write; - return devm_of_mdiobus_register(priv->dev, bus, mdio); + err = devm_of_mdiobus_register(priv->dev, bus, mdio); + goto out_put_node; } /* If a mapping can't be found the legacy mapping is used, @@ -869,7 +874,13 @@ qca8k_mdio_register(struct qca8k_priv *priv) bus->name = "qca8k-legacy slave mii"; bus->read = qca8k_legacy_mdio_read; bus->write = qca8k_legacy_mdio_write; - return devm_mdiobus_register(priv->dev, bus); + + err = devm_mdiobus_register(priv->dev, bus); + +out_put_node: + of_node_put(mdio); + + return err; } static int @@ -1924,12 +1935,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev) priv->info = of_device_get_match_data(priv->dev); priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset", - GPIOD_ASIS); + GPIOD_OUT_HIGH); if (IS_ERR(priv->reset_gpio)) return PTR_ERR(priv->reset_gpio); if (priv->reset_gpio) { - gpiod_set_value_cansleep(priv->reset_gpio, 1); /* The active low duration must be greater than 10 ms * and checkpatch.pl wants 20 ms. */ diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 1917da784191..5a274b99f299 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig" source "drivers/net/ethernet/i825xx/Kconfig" source "drivers/net/ethernet/ibm/Kconfig" source "drivers/net/ethernet/intel/Kconfig" -source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/xscale/Kconfig" config JME @@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig" source "drivers/net/ethernet/tundra/Kconfig" source "drivers/net/ethernet/vertexcom/Kconfig" source "drivers/net/ethernet/via/Kconfig" +source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/wiznet/Kconfig" source "drivers/net/ethernet/xilinx/Kconfig" source "drivers/net/ethernet/xircom/Kconfig" diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 044b8afde69a..9e82e7b9c3b7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3174,22 +3174,6 @@ error_drop_packet: return NETDEV_TX_OK; } -static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - u16 qid; - /* we suspect that this is good for in--kernel network services that - * want to loop incoming skb rx to tx in normal user generated traffic, - * most probably we will not get to this - */ - if (skb_rx_queue_recorded(skb)) - qid = skb_get_rx_queue(skb); - else - qid = netdev_pick_tx(dev, skb, NULL); - - return qid; -} - static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -3359,7 +3343,6 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_open = ena_open, .ndo_stop = ena_close, .ndo_start_xmit = ena_start_xmit, - .ndo_select_queue = ena_select_queue, .ndo_get_stats64 = ena_get_stats64, .ndo_tx_timeout = ena_tx_timeout, .ndo_change_mtu = ena_change_mtu, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 28c9b6f1a54f..5acb3e16b567 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -953,8 +953,6 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) { struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp; unsigned int tx_ring_idx, rx_ring_idx; - struct aq_ring_s *hwts; - struct aq_ring_s *ring; int err; if (!aq_ptp) @@ -962,29 +960,23 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) tx_ring_idx = aq_ptp_ring_idx(aq_nic->aq_nic_cfg.tc_mode); - ring = aq_ring_tx_alloc(&aq_ptp->ptp_tx, aq_nic, - tx_ring_idx, &aq_nic->aq_nic_cfg); - if (!ring) { - err = -ENOMEM; + err = aq_ring_tx_alloc(&aq_ptp->ptp_tx, aq_nic, + tx_ring_idx, &aq_nic->aq_nic_cfg); + if (err) goto err_exit; - } rx_ring_idx = aq_ptp_ring_idx(aq_nic->aq_nic_cfg.tc_mode); - ring = aq_ring_rx_alloc(&aq_ptp->ptp_rx, aq_nic, - rx_ring_idx, &aq_nic->aq_nic_cfg); - if (!ring) { - err = -ENOMEM; + err = aq_ring_rx_alloc(&aq_ptp->ptp_rx, aq_nic, + rx_ring_idx, &aq_nic->aq_nic_cfg); + if (err) goto err_exit_ptp_tx; - } - hwts = aq_ring_hwts_rx_alloc(&aq_ptp->hwts_rx, aq_nic, PTP_HWST_RING_IDX, - aq_nic->aq_nic_cfg.rxds, - aq_nic->aq_nic_cfg.aq_hw_caps->rxd_size); - if (!hwts) { - err = -ENOMEM; + err = aq_ring_hwts_rx_alloc(&aq_ptp->hwts_rx, aq_nic, PTP_HWST_RING_IDX, + aq_nic->aq_nic_cfg.rxds, + aq_nic->aq_nic_cfg.aq_hw_caps->rxd_size); + if (err) goto err_exit_ptp_rx; - } err = aq_ptp_skb_ring_init(&aq_ptp->skb_ring, aq_nic->aq_nic_cfg.rxds); if (err != 0) { @@ -1001,7 +993,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) return 0; err_exit_hwts_rx: - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); err_exit_ptp_rx: aq_ring_free(&aq_ptp->ptp_rx); err_exit_ptp_tx: @@ -1019,7 +1011,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic) aq_ring_free(&aq_ptp->ptp_tx); aq_ring_free(&aq_ptp->ptp_rx); - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); aq_ptp_skb_ring_release(&aq_ptp->skb_ring); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 4d9d7d1edb9b..0eaaba3a18ee 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -132,8 +132,8 @@ static int aq_get_rxpages(struct aq_ring_s *self, struct aq_ring_buff_s *rxbuf) return 0; } -static struct aq_ring_s *aq_ring_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic) +static int aq_ring_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic) { int err = 0; @@ -156,46 +156,29 @@ static struct aq_ring_s *aq_ring_alloc(struct aq_ring_s *self, err_exit: if (err < 0) { aq_ring_free(self); - self = NULL; } - return self; + return err; } -struct aq_ring_s *aq_ring_tx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, - unsigned int idx, - struct aq_nic_cfg_s *aq_nic_cfg) +int aq_ring_tx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, + unsigned int idx, + struct aq_nic_cfg_s *aq_nic_cfg) { - int err = 0; - self->aq_nic = aq_nic; self->idx = idx; self->size = aq_nic_cfg->txds; self->dx_size = aq_nic_cfg->aq_hw_caps->txd_size; - self = aq_ring_alloc(self, aq_nic); - if (!self) { - err = -ENOMEM; - goto err_exit; - } - -err_exit: - if (err < 0) { - aq_ring_free(self); - self = NULL; - } - - return self; + return aq_ring_alloc(self, aq_nic); } -struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, - unsigned int idx, - struct aq_nic_cfg_s *aq_nic_cfg) +int aq_ring_rx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, + unsigned int idx, + struct aq_nic_cfg_s *aq_nic_cfg) { - int err = 0; - self->aq_nic = aq_nic; self->idx = idx; self->size = aq_nic_cfg->rxds; @@ -217,22 +200,10 @@ struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self, self->tail_size = 0; } - self = aq_ring_alloc(self, aq_nic); - if (!self) { - err = -ENOMEM; - goto err_exit; - } - -err_exit: - if (err < 0) { - aq_ring_free(self); - self = NULL; - } - - return self; + return aq_ring_alloc(self, aq_nic); } -struct aq_ring_s * +int aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic, unsigned int idx, unsigned int size, unsigned int dx_size) { @@ -250,10 +221,10 @@ aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic, GFP_KERNEL); if (!self->dx_ring) { aq_ring_free(self); - return NULL; + return -ENOMEM; } - return self; + return 0; } int aq_ring_init(struct aq_ring_s *self, const enum atl_ring_type ring_type) @@ -948,6 +919,19 @@ void aq_ring_free(struct aq_ring_s *self) } } +void aq_ring_hwts_rx_free(struct aq_ring_s *self) +{ + if (!self) + return; + + if (self->dx_ring) { + dma_free_coherent(aq_nic_get_dev(self->aq_nic), + self->size * self->dx_size + AQ_CFG_RXDS_DEF, + self->dx_ring, self->dx_ring_pa); + self->dx_ring = NULL; + } +} + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) { unsigned int count; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 0a6c34438c1d..d627ace850ff 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -183,14 +183,14 @@ static inline unsigned int aq_ring_avail_dx(struct aq_ring_s *self) self->sw_head - self->sw_tail - 1); } -struct aq_ring_s *aq_ring_tx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, - unsigned int idx, - struct aq_nic_cfg_s *aq_nic_cfg); -struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, - unsigned int idx, - struct aq_nic_cfg_s *aq_nic_cfg); +int aq_ring_tx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, + unsigned int idx, + struct aq_nic_cfg_s *aq_nic_cfg); +int aq_ring_rx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, + unsigned int idx, + struct aq_nic_cfg_s *aq_nic_cfg); int aq_ring_init(struct aq_ring_s *self, const enum atl_ring_type ring_type); void aq_ring_rx_deinit(struct aq_ring_s *self); @@ -207,9 +207,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int budget); int aq_ring_rx_fill(struct aq_ring_s *self); -struct aq_ring_s *aq_ring_hwts_rx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, unsigned int idx, - unsigned int size, unsigned int dx_size); +int aq_ring_hwts_rx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, unsigned int idx, + unsigned int size, unsigned int dx_size); +void aq_ring_hwts_rx_free(struct aq_ring_s *self); void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic); unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f5db1c44e9b9..9769ab4f9bef 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -136,35 +136,32 @@ int aq_vec_ring_alloc(struct aq_vec_s *self, struct aq_nic_s *aq_nic, const unsigned int idx_ring = AQ_NIC_CFG_TCVEC2RING(aq_nic_cfg, i, idx); - ring = aq_ring_tx_alloc(&self->ring[i][AQ_VEC_TX_ID], aq_nic, - idx_ring, aq_nic_cfg); - if (!ring) { - err = -ENOMEM; + ring = &self->ring[i][AQ_VEC_TX_ID]; + err = aq_ring_tx_alloc(ring, aq_nic, idx_ring, aq_nic_cfg); + if (err) goto err_exit; - } ++self->tx_rings; aq_nic_set_tx_ring(aq_nic, idx_ring, ring); - if (xdp_rxq_info_reg(&self->ring[i][AQ_VEC_RX_ID].xdp_rxq, + ring = &self->ring[i][AQ_VEC_RX_ID]; + if (xdp_rxq_info_reg(&ring->xdp_rxq, aq_nic->ndev, idx, self->napi.napi_id) < 0) { err = -ENOMEM; goto err_exit; } - if (xdp_rxq_info_reg_mem_model(&self->ring[i][AQ_VEC_RX_ID].xdp_rxq, + if (xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL) < 0) { - xdp_rxq_info_unreg(&self->ring[i][AQ_VEC_RX_ID].xdp_rxq); + xdp_rxq_info_unreg(&ring->xdp_rxq); err = -ENOMEM; goto err_exit; } - ring = aq_ring_rx_alloc(&self->ring[i][AQ_VEC_RX_ID], aq_nic, - idx_ring, aq_nic_cfg); - if (!ring) { - xdp_rxq_info_unreg(&self->ring[i][AQ_VEC_RX_ID].xdp_rxq); - err = -ENOMEM; + err = aq_ring_rx_alloc(ring, aq_nic, idx_ring, aq_nic_cfg); + if (err) { + xdp_rxq_info_unreg(&ring->xdp_rxq); goto err_exit; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index d8b1824c334d..0bc1367fd649 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -1002,9 +1002,6 @@ static inline void bnx2x_set_fw_mac_addr(__le16 *fw_hi, __le16 *fw_mid, static inline void bnx2x_free_rx_mem_pool(struct bnx2x *bp, struct bnx2x_alloc_pool *pool) { - if (!pool->page) - return; - put_page(pool->page); pool->page = NULL; @@ -1015,6 +1012,9 @@ static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp, { int i; + if (!fp->page_pool.page) + return; + if (fp->mode == TPA_MODE_DISABLED) return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index df4d88d35701..f810b5dc25f0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12269,6 +12269,11 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp) bp->fw_cap = 0; rc = bnxt_hwrm_ver_get(bp); + /* FW may be unresponsive after FLR. FLR must complete within 100 msec + * so wait before continuing with recovery. + */ + if (rc) + msleep(100); bnxt_try_map_fw_health_reg(bp); if (rc) { rc = bnxt_try_recover_fw(bp); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 6d1b76002282..97d12c7eea77 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1917,6 +1917,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) /* if any of the above changed restart the FEC */ if (status_change) { + netif_stop_queue(ndev); napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); @@ -1926,6 +1927,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) } } else { if (fep->link) { + netif_stop_queue(ndev); napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_stop(ndev); diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 588d64819ed5..e84e944d751d 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -350,6 +350,7 @@ static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, /* Validates and prepares `skb` for TSO. * * Returns header length, or < 0 if invalid. + * Warning : Might change skb->head (and thus skb_shinfo). */ static int gve_prep_tso(struct sk_buff *skb) { @@ -451,8 +452,8 @@ gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, struct sk_buff *skb) { - const struct skb_shared_info *shinfo = skb_shinfo(skb); const bool is_gso = skb_is_gso(skb); + struct skb_shared_info *shinfo; u32 desc_idx = tx->dqo_tx.tail; struct gve_tx_pending_packet_dqo *pkt; @@ -477,6 +478,8 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, desc_idx = (desc_idx + 1) & tx->mask; } + /* Must get after gve_prep_tso(), which can change shinfo. */ + shinfo = skb_shinfo(skb); gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx, &metadata); desc_idx = (desc_idx + 1) & tx->mask; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c index 3b6dbf158b98..f72dc0cee30e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c @@ -76,7 +76,7 @@ static int hns3_dcbnl_ieee_delapp(struct net_device *ndev, struct dcb_app *app) if (hns3_nic_resetting(ndev)) return -EBUSY; - if (h->kinfo.dcb_ops->ieee_setapp) + if (h->kinfo.dcb_ops->ieee_delapp) return h->kinfo.dcb_ops->ieee_delapp(h, app); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 48b0cb5ec5d2..27037ce79590 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2990,7 +2990,10 @@ static int hclge_mac_init(struct hclge_dev *hdev) int ret; hdev->support_sfp_query = true; - hdev->hw.mac.duplex = HCLGE_MAC_FULL; + + if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + hdev->hw.mac.duplex = HCLGE_MAC_FULL; + ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed, hdev->hw.mac.duplex, hdev->hw.mac.lane_num); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index a40b1583f114..0f06f95b09bc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -120,7 +120,7 @@ void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb, u64 ns = nsec; u32 sec_h; - if (!test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags)) + if (!hdev->ptp || !test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags)) return; /* Since the BD does not have enough space for the higher 16 bits of diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h index 8510b88d4982..f3cd5a376eca 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h @@ -24,7 +24,7 @@ TRACE_EVENT(hclge_pf_mbx_get, __field(u8, code) __field(u8, subcode) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->vport[0].nic.kinfo.netdev->name) + __string(devname, hdev->vport[0].nic.kinfo.netdev->name) __array(u32, mbx_data, PF_GET_MBX_LEN) ), @@ -33,7 +33,7 @@ TRACE_EVENT(hclge_pf_mbx_get, __entry->code = req->msg.code; __entry->subcode = req->msg.subcode; __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name); + __assign_str(devname, hdev->vport[0].nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_vf_to_pf_cmd)); ), @@ -56,7 +56,7 @@ TRACE_EVENT(hclge_pf_mbx_send, __field(u8, vfid) __field(u16, code) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->vport[0].nic.kinfo.netdev->name) + __string(devname, hdev->vport[0].nic.kinfo.netdev->name) __array(u32, mbx_data, PF_SEND_MBX_LEN) ), @@ -64,7 +64,7 @@ TRACE_EVENT(hclge_pf_mbx_send, __entry->vfid = req->dest_vfid; __entry->code = le16_to_cpu(req->msg.code); __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name); + __assign_str(devname, hdev->vport[0].nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_pf_to_vf_cmd)); ), diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h index 5d4895bb57a1..b259e95dd53c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h @@ -23,7 +23,7 @@ TRACE_EVENT(hclge_vf_mbx_get, __field(u8, vfid) __field(u16, code) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->nic.kinfo.netdev->name) + __string(devname, hdev->nic.kinfo.netdev->name) __array(u32, mbx_data, VF_GET_MBX_LEN) ), @@ -31,7 +31,7 @@ TRACE_EVENT(hclge_vf_mbx_get, __entry->vfid = req->dest_vfid; __entry->code = le16_to_cpu(req->msg.code); __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->nic.kinfo.netdev->name); + __assign_str(devname, hdev->nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_pf_to_vf_cmd)); ), @@ -55,7 +55,7 @@ TRACE_EVENT(hclge_vf_mbx_send, __field(u8, code) __field(u8, subcode) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->nic.kinfo.netdev->name) + __string(devname, hdev->nic.kinfo.netdev->name) __array(u32, mbx_data, VF_SEND_MBX_LEN) ), @@ -64,7 +64,7 @@ TRACE_EVENT(hclge_vf_mbx_send, __entry->code = req->msg.code; __entry->subcode = req->msg.subcode; __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->nic.kinfo.netdev->name); + __assign_str(devname, hdev->nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_vf_to_pf_cmd)); ), diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 63d43ef86f9b..d8a7fb21b7b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5333,7 +5333,7 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { int v, ret = 0; - for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { + for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v]) { ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); if (ret) @@ -13569,9 +13569,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index c7d761426d6c..ed4be80fec2a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2603,6 +2603,14 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) int aq_ret = 0; int i; + if (vf->is_disabled_from_host) { + aq_ret = -EPERM; + dev_info(&pf->pdev->dev, + "Admin has disabled VF %d, will not enable queues\n", + vf->vf_id); + goto error_param; + } + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -2838,6 +2846,24 @@ error_param: (u8 *)&stats, sizeof(stats)); } +/** + * i40e_can_vf_change_mac + * @vf: pointer to the VF info + * + * Return true if the VF is allowed to change its MAC filters, false otherwise + */ +static bool i40e_can_vf_change_mac(struct i40e_vf *vf) +{ + /* If the VF MAC address has been set administratively (via the + * ndo_set_vf_mac command), then deny permission to the VF to + * add/delete unicast MAC addresses, unless the VF is trusted + */ + if (vf->pf_set_mac && !vf->trusted) + return false; + + return true; +} + #define I40E_MAX_MACVLAN_PER_HW 3072 #define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \ (num_ports)) @@ -2897,8 +2923,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, * The VF may request to set the MAC address filter already * assigned to it so do not return an error in that case. */ - if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && - !is_multicast_ether_addr(addr) && vf->pf_set_mac && + if (!i40e_can_vf_change_mac(vf) && + !is_multicast_ether_addr(addr) && !ether_addr_equal(addr, vf->default_lan_addr.addr)) { dev_err(&pf->pdev->dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); @@ -3041,19 +3067,29 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } - if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr)) - was_unimac_deleted = true; } vsi = pf->vsi[vf->lan_vsi_idx]; spin_lock_bh(&vsi->mac_filter_hash_lock); /* delete addresses from the list */ - for (i = 0; i < al->num_elements; i++) + for (i = 0; i < al->num_elements; i++) { + const u8 *addr = al->list[i].addr; + + /* Allow to delete VF primary MAC only if it was not set + * administratively by PF or if VF is trusted. + */ + if (ether_addr_equal(addr, vf->default_lan_addr.addr) && + i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = I40E_ERR_INVALID_MAC_ADDR; spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; } + } spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -4656,9 +4692,12 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) struct i40e_link_status *ls = &pf->hw.phy.link_info; struct virtchnl_pf_event pfe; struct i40e_hw *hw = &pf->hw; + struct i40e_vsi *vsi; + unsigned long q_map; struct i40e_vf *vf; int abs_vf_id; int ret = 0; + int tmp; if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) { dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n"); @@ -4681,17 +4720,38 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) switch (link) { case IFLA_VF_LINK_STATE_AUTO: vf->link_forced = false; + vf->is_disabled_from_host = false; + /* reset needed to reinit VF resources */ + i40e_vc_reset_vf(vf, true); i40e_set_vf_link_state(vf, &pfe, ls); break; case IFLA_VF_LINK_STATE_ENABLE: vf->link_forced = true; vf->link_up = true; + vf->is_disabled_from_host = false; + /* reset needed to reinit VF resources */ + i40e_vc_reset_vf(vf, true); i40e_set_vf_link_state(vf, &pfe, ls); break; case IFLA_VF_LINK_STATE_DISABLE: vf->link_forced = true; vf->link_up = false; i40e_set_vf_link_state(vf, &pfe, ls); + + vsi = pf->vsi[vf->lan_vsi_idx]; + q_map = BIT(vsi->num_queue_pairs) - 1; + + vf->is_disabled_from_host = true; + + /* Try to stop both Tx&Rx rings even if one of the calls fails + * to ensure we stop the rings even in case of errors. + * If any of them returns with an error then the first + * error that occurred will be returned. + */ + tmp = i40e_ctrl_vf_tx_rings(vsi, q_map, false); + ret = i40e_ctrl_vf_rx_rings(vsi, q_map, false); + + ret = tmp ? tmp : ret; break; default: ret = -EINVAL; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index bd497cc5303a..97e9c34d7c6c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -98,6 +98,7 @@ struct i40e_vf { bool link_forced; bool link_up; /* only valid if VF link is forced */ bool spoofchk; + bool is_disabled_from_host; /* PF ctrl of VF enable/disable */ u16 num_vlan; /* ADq related variables */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 1bdc70aa979d..fe48164dce1e 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -413,10 +413,10 @@ struct ice_aqc_vsi_props { #define ICE_AQ_VSI_INNER_VLAN_INSERT_PVID BIT(2) #define ICE_AQ_VSI_INNER_VLAN_EMODE_S 3 #define ICE_AQ_VSI_INNER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH (0x0 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP (0x1 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR (0x2 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH 0x0U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP 0x1U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR 0x2U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING 0x3U u8 inner_vlan_reserved2[3]; /* ingress egress up sections */ __le32 ingress_table; /* bitmap, 3 bits per up */ @@ -482,11 +482,11 @@ struct ice_aqc_vsi_props { #define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2 #define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S) #define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6 -#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M GENMASK(7, 6) +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ 0x0U +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ 0x1U +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR 0x2U +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_JHASH 0x3U u8 q_opt_tc; #define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0 #define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index c051503c3a89..cc6c04a69b28 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -878,7 +878,8 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt) */ if (ice_is_dvm_ena(hw)) { ctxt->info.inner_vlan_flags |= - ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; + FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING); ctxt->info.outer_vlan_flags = (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL << ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) & @@ -1085,12 +1086,12 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; - hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + hash_type = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; break; case ICE_VSI_VF: /* VF VSI will gets a small RSS table which is a VSI LUT type */ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; - hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + hash_type = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; break; default: dev_dbg(dev, "Unsupported VSI type %s\n", diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ab46cfca4028..3117f65253b3 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7681,6 +7681,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 2b4c791b6cba..4b71392f60df 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vf->driver_caps = *(u32 *)msg; else vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_REG | VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; @@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, vf->driver_caps); - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; - } else { - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; - else - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; - } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF; @@ -817,8 +810,8 @@ static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) int status; lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; - hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_XOR : - ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : + ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { @@ -826,11 +819,9 @@ static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) goto error_param; } - ctx->info.q_opt_rss = ((lut_type << - ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & - ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | - (hash_type & - ICE_AQ_VSI_Q_OPT_RSS_HASH_M); + ctx->info.q_opt_rss = + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) | + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type); /* Preserve existing queueing option setting */ ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5a82216e7d03..63e83e8b97e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -13,8 +13,6 @@ * - opcodes needed by VF when caps are activated * * Caps that don't use new opcodes (no opcodes should be allowed): - * - VIRTCHNL_VF_OFFLOAD_RSS_AQ - * - VIRTCHNL_VF_OFFLOAD_RSS_REG * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR * - VIRTCHNL_VF_OFFLOAD_CRC * - VIRTCHNL_VF_OFFLOAD_RX_POLLING diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c index 5b4a0abb4607..239266e9d5f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c @@ -131,6 +131,7 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) { struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; + u8 *ivf; int err; /* do not allow modifying VLAN stripping when a port VLAN is configured @@ -143,19 +144,24 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) if (!ctxt) return -ENOMEM; + ivf = &ctxt->info.inner_vlan_flags; + /* Here we are configuring what the VSI should do with the VLAN tag in * the Rx packet. We can either leave the tag in the packet or put it in * the Rx descriptor. */ - if (ena) + if (ena) { /* Strip VLAN tag from Rx packet and put it in the desc */ - ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH; - else + *ivf = FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH); + } else { /* Disable stripping. Leave tag in packet */ - ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; + *ivf = FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING); + } /* Allow all packets untagged/tagged */ - ctxt->info.inner_vlan_flags |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL; + *ivf |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL; ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 41ee081eb887..48cf24709fe3 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -171,6 +171,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) return -EBUSY; usleep_range(1000, 2000); } + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); @@ -187,13 +191,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -256,11 +257,11 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; - clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + clear_bit(ICE_CFG_BUSY, vsi->state); free_buf: kfree(qg_buf); return err; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 45ce4ed16146..81d9a5338be5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6926,44 +6926,31 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) static void igb_tsync_interrupt(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 ack = 0, tsicr = rd32(E1000_TSICR); + u32 tsicr = rd32(E1000_TSICR); struct ptp_clock_event event; if (tsicr & TSINTR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; if (adapter->ptp_caps.pps) ptp_clock_event(adapter->ptp_clock, &event); - ack |= TSINTR_SYS_WRAP; } if (tsicr & E1000_TSICR_TXTS) { /* retrieve hardware timestamp */ schedule_work(&adapter->ptp_tx_work); - ack |= E1000_TSICR_TXTS; } - if (tsicr & TSINTR_TT0) { + if (tsicr & TSINTR_TT0) igb_perout(adapter, 0); - ack |= TSINTR_TT0; - } - if (tsicr & TSINTR_TT1) { + if (tsicr & TSINTR_TT1) igb_perout(adapter, 1); - ack |= TSINTR_TT1; - } - if (tsicr & TSINTR_AUTT0) { + if (tsicr & TSINTR_AUTT0) igb_extts(adapter, 0); - ack |= TSINTR_AUTT0; - } - if (tsicr & TSINTR_AUTT1) { + if (tsicr & TSINTR_AUTT1) igb_extts(adapter, 1); - ack |= TSINTR_AUTT1; - } - - /* acknowledge the interrupts */ - wr32(E1000_TSICR, ack); } static irqreturn_t igb_msix_other(int irq, void *data) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 07171e574e7d..36e62197fba0 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -976,7 +976,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); /* adjust timestamp for the TX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_TX_LATENCY_10; @@ -1022,6 +1022,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, ktime_t *timestamp) { struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; struct skb_shared_hwtstamps ts; __le64 *regval = (__le64 *)va; int adjust = 0; @@ -1041,7 +1042,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_RX_LATENCY_10; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4b6f882b380d..e052f49cc08d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6330,7 +6330,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; @@ -6346,16 +6346,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); - drops = 0; + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -6363,7 +6362,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 100388968e4d..3d56481e16bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -123,14 +123,14 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) if (ret_val) return ret_val; if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; /* Check to see if SFP+ module is supported */ ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, &data_offset); if (ret_val) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; break; default: break; @@ -213,7 +213,7 @@ static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, break; default: - return IXGBE_ERR_LINK_SETUP; + return -EIO; } return 0; @@ -283,7 +283,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) /* Validate the water mark configuration */ if (!hw->fc.pause_time) - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; /* Low water mark of zero causes XOFF floods */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { @@ -292,7 +292,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) if (!hw->fc.low_water[i] || hw->fc.low_water[i] >= hw->fc.high_water[i]) { hw_dbg(hw, "Invalid water mark configuration\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } } } @@ -369,7 +369,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Set 802.3x based flow control settings. */ @@ -438,7 +438,7 @@ static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autonegotiation did not complete.\n"); } } @@ -478,7 +478,7 @@ static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw) if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) { hw_dbg(hw, "Link was indicated but link is down\n"); - return IXGBE_ERR_LINK_SETUP; + return -EIO; } return 0; @@ -594,7 +594,7 @@ static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, speed &= link_capabilities; if (speed == IXGBE_LINK_SPEED_UNKNOWN) - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; /* Set KX4/KX support according to speed requested */ else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN || @@ -701,9 +701,9 @@ static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) /* Init PHY and function pointers, perform SFP setup */ phy_status = hw->phy.ops.init(hw); - if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (phy_status == -EOPNOTSUPP) return phy_status; - if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT) + if (phy_status == -ENOENT) goto mac_reset_top; hw->phy.ops.reset(hw); @@ -727,7 +727,7 @@ mac_reset_top: udelay(1); } if (ctrl & IXGBE_CTRL_RST) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -789,7 +789,7 @@ static s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); @@ -814,7 +814,7 @@ static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); @@ -845,7 +845,7 @@ static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, u32 vftabyte; if (vlan > 4095) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Determine 32-bit word position in array */ regindex = (vlan >> 5) & 0x7F; /* upper seven bits */ @@ -964,7 +964,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, gssr = IXGBE_GSSR_PHY0_SM; if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; if (hw->phy.type == ixgbe_phy_nl) { /* @@ -993,7 +993,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) { hw_dbg(hw, "EEPROM read did not pass.\n"); - status = IXGBE_ERR_SFP_NOT_PRESENT; + status = -ENOENT; goto out; } @@ -1003,7 +1003,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, *eeprom_data = (u8)(sfp_data >> 8); } else { - status = IXGBE_ERR_PHY; + status = -EIO; } out: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 58ea959a4482..339e106a5732 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -117,7 +117,7 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; if (hw->eeprom.ops.read(hw, ++data_offset, &data_value)) goto setup_sfp_err; @@ -144,7 +144,7 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) if (ret_val) { hw_dbg(hw, " sfp module setup not complete\n"); - return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + return -EIO; } } @@ -159,7 +159,7 @@ setup_sfp_err: usleep_range(hw->eeprom.semaphore_delay * 1000, hw->eeprom.semaphore_delay * 2000); hw_err(hw, "eeprom read at offset %d failed\n", data_offset); - return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + return -EIO; } /** @@ -184,7 +184,7 @@ static s32 prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked, ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; *locked = true; } @@ -219,7 +219,7 @@ static s32 prot_autoc_write_82599(struct ixgbe_hw *hw, u32 autoc, bool locked) ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; locked = true; } @@ -400,7 +400,7 @@ static s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, break; default: - return IXGBE_ERR_LINK_SETUP; + return -EIO; } if (hw->phy.multispeed_fiber) { @@ -541,7 +541,7 @@ static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autoneg did not complete.\n"); } } @@ -794,7 +794,7 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, speed &= link_capabilities; if (speed == IXGBE_LINK_SPEED_UNKNOWN) - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; /* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/ if (hw->mac.orig_link_settings_stored) @@ -861,8 +861,7 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = - IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autoneg did not complete.\n"); } } @@ -927,7 +926,7 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) /* Identify PHY and related function pointers */ status = hw->phy.ops.init(hw); - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Setup SFP module if there is one present. */ @@ -936,7 +935,7 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) hw->phy.sfp_setup_needed = false; } - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Reset PHY */ @@ -974,7 +973,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -1093,7 +1092,7 @@ static s32 ixgbe_fdir_check_cmd_complete(struct ixgbe_hw *hw, u32 *fdircmd) udelay(10); } - return IXGBE_ERR_FDIR_CMD_INCOMPLETE; + return -EIO; } /** @@ -1155,7 +1154,7 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw) } if (i >= IXGBE_FDIR_INIT_DONE_POLL) { hw_dbg(hw, "Flow Director Signature poll time exceeded!\n"); - return IXGBE_ERR_FDIR_REINIT_FAILED; + return -EIO; } /* Clear FDIR statistics registers (read to clear) */ @@ -1387,7 +1386,7 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on flow type input\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* configure FDIRCMD register */ @@ -1546,7 +1545,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on vm pool mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) { @@ -1555,14 +1554,14 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, if (input_mask->formatted.dst_port || input_mask->formatted.src_port) { hw_dbg(hw, " Error on src/dst port mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } break; case IXGBE_ATR_L4TYPE_MASK: break; default: hw_dbg(hw, " Error on flow type mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (ntohs(input_mask->formatted.vlan_id) & 0xEFFF) { @@ -1583,7 +1582,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on VLAN mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch ((__force u16)input_mask->formatted.flex_bytes & 0xFFFF) { @@ -1595,7 +1594,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on flexible byte mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ @@ -1824,7 +1823,7 @@ static s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw) /* Return error if SFP module has been detected but is not supported */ if (hw->phy.type == ixgbe_phy_sfp_unsupported) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; return status; } @@ -1863,13 +1862,13 @@ static s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval) * Verifies that installed the firmware version is 0.6 or higher * for SFI devices. All 82599 SFI devices should have version 0.6 or higher. * - * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or - * if the FW version is not supported. + * Return: -EACCES if the FW is not present or if the FW version is + * not supported. **/ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) { - s32 status = IXGBE_ERR_EEPROM_VERSION; u16 fw_offset, fw_ptp_cfg_offset; + s32 status = -EACCES; u16 offset; u16 fw_version = 0; @@ -1883,7 +1882,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) goto fw_version_err; if (fw_offset == 0 || fw_offset == 0xFFFF) - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; /* get the offset to the Pass Through Patch Configuration block */ offset = fw_offset + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR; @@ -1891,7 +1890,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) goto fw_version_err; if (fw_ptp_cfg_offset == 0 || fw_ptp_cfg_offset == 0xFFFF) - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; /* get the firmware version */ offset = fw_ptp_cfg_offset + IXGBE_FW_PATCH_VERSION_4; @@ -1905,7 +1904,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) fw_version_err: hw_err(hw, "eeprom read at offset %d failed\n", offset); - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; } /** @@ -2038,7 +2037,7 @@ static s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw) if (!(anlp1_reg & IXGBE_ANLP1_AN_STATE_MASK)) { hw_dbg(hw, "auto negotiation not completed\n"); - ret_val = IXGBE_ERR_RESET_FAILED; + ret_val = -EIO; goto reset_pipeline_out; } @@ -2087,7 +2086,7 @@ static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, if (!timeout) { hw_dbg(hw, "Driver can't access resource, acquiring I2C bus timeout.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; goto release_i2c_access; } } @@ -2141,7 +2140,7 @@ static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, if (!timeout) { hw_dbg(hw, "Driver can't access resource, acquiring I2C bus timeout.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; goto release_i2c_access; } } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 38c4609bd429..5688a6ad4b3b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -124,7 +124,7 @@ s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw) */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } /* @@ -215,7 +215,7 @@ s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } if (hw->mac.type != ixgbe_mac_X540) { @@ -500,7 +500,7 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, if (pba_num == NULL) { hw_dbg(hw, "PBA string buffer was null\n"); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data); @@ -526,7 +526,7 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, /* we will need 11 characters to store the PBA */ if (pba_num_size < 11) { hw_dbg(hw, "PBA string buffer too small\n"); - return IXGBE_ERR_NO_SPACE; + return -ENOSPC; } /* extract hex string from data and pba_ptr */ @@ -563,13 +563,13 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, if (length == 0xFFFF || length == 0) { hw_dbg(hw, "NVM PBA number section invalid length\n"); - return IXGBE_ERR_PBA_SECTION; + return -EIO; } /* check if pba_num buffer is big enough */ if (pba_num_size < (((u32)length * 2) - 1)) { hw_dbg(hw, "PBA string buffer too small\n"); - return IXGBE_ERR_NO_SPACE; + return -ENOSPC; } /* trim pba length from start of string */ @@ -805,7 +805,7 @@ s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index) u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); @@ -826,7 +826,7 @@ s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index) u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn off the LED, set mode to OFF. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); @@ -904,11 +904,8 @@ s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset + words > hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || (offset + words > hw->eeprom.word_size)) + return -EINVAL; /* * The EEPROM page size cannot be queried from the chip. We do lazy @@ -962,7 +959,7 @@ static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, if (ixgbe_ready_eeprom(hw) != 0) { ixgbe_release_eeprom(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } for (i = 0; i < words; i++) { @@ -1028,7 +1025,7 @@ s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data) hw->eeprom.ops.init_params(hw); if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + return -EINVAL; return ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data); } @@ -1050,11 +1047,8 @@ s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset + words > hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || (offset + words > hw->eeprom.word_size)) + return -EINVAL; /* * We cannot hold synchronization semaphores for too long @@ -1099,7 +1093,7 @@ static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, if (ixgbe_ready_eeprom(hw) != 0) { ixgbe_release_eeprom(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } for (i = 0; i < words; i++) { @@ -1142,7 +1136,7 @@ s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + return -EINVAL; return ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); } @@ -1165,11 +1159,8 @@ s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || offset >= hw->eeprom.word_size) + return -EINVAL; for (i = 0; i < words; i++) { eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | @@ -1262,11 +1253,8 @@ s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || offset >= hw->eeprom.word_size) + return -EINVAL; for (i = 0; i < words; i++) { eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | @@ -1328,7 +1316,7 @@ static s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg) } udelay(5); } - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -1344,7 +1332,7 @@ static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) u32 i; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) != 0) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); @@ -1366,7 +1354,7 @@ static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) hw_dbg(hw, "Could not acquire EEPROM grant\n"); hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Setup EEPROM for Read/Write */ @@ -1419,7 +1407,7 @@ static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw)); if (swsm & IXGBE_SWSM_SMBI) { hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } } @@ -1447,7 +1435,7 @@ static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) if (i >= timeout) { hw_dbg(hw, "SWESMBI Software EEPROM semaphore not granted.\n"); ixgbe_release_eeprom_semaphore(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } return 0; @@ -1503,7 +1491,7 @@ static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw) */ if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) { hw_dbg(hw, "SPI EEPROM Status error\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } return 0; @@ -1715,7 +1703,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { if (hw->eeprom.ops.read(hw, i, &pointer)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* If the pointer seems invalid */ @@ -1724,7 +1712,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) if (hw->eeprom.ops.read(hw, pointer, &length)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } if (length == 0xFFFF || length == 0) @@ -1733,7 +1721,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) for (j = pointer + 1; j <= pointer + length; j++) { if (hw->eeprom.ops.read(hw, j, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -1786,7 +1774,7 @@ s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, * calculated checksum */ if (read_checksum != checksum) - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; /* If the user cares, return the calculated checksum */ if (checksum_val) @@ -1845,7 +1833,7 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, /* Make sure we are using a valid rar index range */ if (index >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", index); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } /* setup VMDq pool selection before this RAR gets enabled */ @@ -1897,7 +1885,7 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) /* Make sure we are using a valid rar index range */ if (index >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", index); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } /* @@ -2146,7 +2134,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) /* Validate the water mark configuration. */ if (!hw->fc.pause_time) - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; /* Low water mark of zero causes XOFF floods */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { @@ -2155,7 +2143,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) if (!hw->fc.low_water[i] || hw->fc.low_water[i] >= hw->fc.high_water[i]) { hw_dbg(hw, "Invalid water mark configuration\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } } } @@ -2212,7 +2200,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Set 802.3x based flow control settings. */ @@ -2269,7 +2257,7 @@ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) { if ((!(adv_reg)) || (!(lp_reg))) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EINVAL; if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) { /* @@ -2321,7 +2309,7 @@ static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw) linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) || (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1)) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); @@ -2353,12 +2341,12 @@ static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw) */ links = IXGBE_READ_REG(hw, IXGBE_LINKS); if ((links & IXGBE_LINKS_KX_AN_COMP) == 0) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; if (hw->mac.type == ixgbe_mac_82599EB) { links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2); if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; } /* * Read the 10g AN autoc and LP ability registers and resolve @@ -2407,8 +2395,8 @@ static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw) **/ void ixgbe_fc_autoneg(struct ixgbe_hw *hw) { - s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; ixgbe_link_speed speed; + s32 ret_val = -EIO; bool link_up; /* @@ -2510,7 +2498,7 @@ static u32 ixgbe_pcie_timeout_poll(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * * Disables PCI-Express primary access and verifies there are no pending - * requests. IXGBE_ERR_PRIMARY_REQUESTS_PENDING is returned if primary disable + * requests. -EALREADY is returned if primary disable * bit hasn't caused the primary requests to be disabled, else 0 * is returned signifying primary requests disabled. **/ @@ -2575,7 +2563,7 @@ gio_disable_fail: } hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n"); - return IXGBE_ERR_PRIMARY_REQUESTS_PENDING; + return -EALREADY; } /** @@ -2600,7 +2588,7 @@ s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask) * SW_FW_SYNC bits (not just NVM) */ if (ixgbe_get_eeprom_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); if (!(gssr & (fwmask | swmask))) { @@ -2620,7 +2608,7 @@ s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask) ixgbe_release_swfw_sync(hw, gssr & (fwmask | swmask)); usleep_range(5000, 10000); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } /** @@ -2757,7 +2745,7 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) s32 ret_val; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* * Link must be up to auto-blink the LEDs; @@ -2803,7 +2791,7 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) s32 ret_val; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg); if (ret_val) @@ -2963,7 +2951,7 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); @@ -3014,7 +3002,7 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } if (vmdq < 32) { @@ -3091,7 +3079,7 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass) * will simply bypass the VLVF if there are no entries present in the * VLVF that contain our VLAN */ - first_empty_slot = vlvf_bypass ? IXGBE_ERR_NO_SPACE : 0; + first_empty_slot = vlvf_bypass ? -ENOSPC : 0; /* add VLAN enable bit for comparison */ vlan |= IXGBE_VLVF_VIEN; @@ -3115,7 +3103,7 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass) if (!first_empty_slot) hw_dbg(hw, "No space in VLVF.\n"); - return first_empty_slot ? : IXGBE_ERR_NO_SPACE; + return first_empty_slot ? : -ENOSPC; } /** @@ -3135,7 +3123,7 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, s32 vlvf_index; if ((vlan > 4095) || (vind > 63)) - return IXGBE_ERR_PARAM; + return -EINVAL; /* * this is a 2 part operation - first the VFTA, then the @@ -3596,7 +3584,8 @@ u8 ixgbe_calculate_checksum(u8 *buffer, u32 length) * * Communicates with the manageability block. On success return 0 * else returns semaphore error when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * semaphore, -EINVAL when incorrect parameters passed or -EIO when + * command fails. * * This function assumes that the IXGBE_GSSR_SW_MNG_SM semaphore is held * by the caller. @@ -3609,7 +3598,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EINVAL; } /* Set bit 9 of FWSTS clearing FW reset indication */ @@ -3620,13 +3609,13 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, hicr = IXGBE_READ_REG(hw, IXGBE_HICR); if (!(hicr & IXGBE_HICR_EN)) { hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n"); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; } /* Calculate length in DWORDs. We must be DWORD aligned */ if (length % sizeof(u32)) { hw_dbg(hw, "Buffer length failure, not aligned to dword"); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } dword_len = length >> 2; @@ -3651,7 +3640,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, /* Check command successful completion. */ if ((timeout && i == timeout) || !(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV)) - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; return 0; } @@ -3671,7 +3660,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, * in these cases. * * Communicates with the manageability block. On success return 0 - * else return IXGBE_ERR_HOST_INTERFACE_COMMAND. + * else return -EIO or -EINVAL. **/ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, u32 length, u32 timeout, @@ -3686,7 +3675,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EINVAL; } /* Take management host interface semaphore */ status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); @@ -3716,7 +3705,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, if (length < round_up(buf_len, 4) + hdr_size) { hw_dbg(hw, "Buffer not large enough for reply message.\n"); - status = IXGBE_ERR_HOST_INTERFACE_COMMAND; + status = -EIO; goto rel_out; } @@ -3747,8 +3736,8 @@ rel_out: * * Sends driver version number to firmware through the manageability * block. On success return 0 - * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * else returns -EBUSY when encountering an error acquiring + * semaphore or -EIO when command fails. **/ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, u8 sub, __always_unused u16 len, @@ -3784,7 +3773,7 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, FW_CEM_RESP_STATUS_SUCCESS) ret_val = 0; else - ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + ret_val = -EIO; break; } @@ -3882,14 +3871,14 @@ static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg, return status; if ((*ets_offset == 0x0000) || (*ets_offset == 0xFFFF)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = hw->eeprom.ops.read(hw, *ets_offset, ets_cfg); if (status) return status; if ((*ets_cfg & IXGBE_ETS_TYPE_MASK) != IXGBE_ETS_TYPE_EMC_SHIFTED) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; return 0; } @@ -3912,7 +3901,7 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw) /* Only support thermal sensors attached to physical port 0 */ if ((IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = ixgbe_get_ets_data(hw, &ets_cfg, &ets_offset); if (status) @@ -3972,7 +3961,7 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) /* Only support thermal sensors attached to physical port 0 */ if ((IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = ixgbe_get_ets_data(hw, &ets_cfg, &ets_offset); if (status) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 0051aa676e19..f8e65e18284e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3339,7 +3339,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + s32 status = -EFAULT; u8 databyte = 0xFF; int i = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 9e0e13638c46..086cc2573033 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2764,7 +2764,6 @@ static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 eicr = adapter->interrupt_event; - s32 rc; if (test_bit(__IXGBE_DOWN, &adapter->state)) return; @@ -2798,14 +2797,13 @@ static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter) } /* Check if this is not due to overtemp */ - if (hw->phy.ops.check_overtemp(hw) != IXGBE_ERR_OVERTEMP) + if (!hw->phy.ops.check_overtemp(hw)) return; break; case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: - rc = hw->phy.ops.check_overtemp(hw); - if (rc != IXGBE_ERR_OVERTEMP) + if (!hw->phy.ops.check_overtemp(hw)) return; break; default: @@ -2949,8 +2947,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -5520,7 +5518,7 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) { u32 speed; bool autoneg, link_up = false; - int ret = IXGBE_ERR_LINK_SETUP; + int ret = -EIO; if (hw->mac.ops.check_link) ret = hw->mac.ops.check_link(hw, &speed, &link_up, false); @@ -5991,13 +5989,13 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) err = hw->mac.ops.init_hw(hw); switch (err) { case 0: - case IXGBE_ERR_SFP_NOT_PRESENT: - case IXGBE_ERR_SFP_NOT_SUPPORTED: + case -ENOENT: + case -EOPNOTSUPP: break; - case IXGBE_ERR_PRIMARY_REQUESTS_PENDING: + case -EALREADY: e_dev_err("primary disable timed out\n"); break; - case IXGBE_ERR_EEPROM_VERSION: + case -EACCES: /* We are running on a pre-production device, log a warning */ e_dev_warn("This device is a pre-production adapter/LOM. " "Please be aware there may be issues associated with " @@ -7837,10 +7835,10 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) adapter->sfp_poll_time = jiffies + IXGBE_SFP_POLL_JIFFIES - 1; err = hw->phy.ops.identify_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (err == -EOPNOTSUPP) goto sfp_out; - if (err == IXGBE_ERR_SFP_NOT_PRESENT) { + if (err == -ENOENT) { /* If no cable is present, then we need to reset * the next time we find a good cable. */ adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET; @@ -7866,7 +7864,7 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) else err = hw->mac.ops.setup_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (err == -EOPNOTSUPP) goto sfp_out; adapter->flags |= IXGBE_FLAG_NEED_LINK_CONFIG; @@ -7875,8 +7873,8 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) sfp_out: clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - if ((err == IXGBE_ERR_SFP_NOT_SUPPORTED) && - (adapter->netdev->reg_state == NETREG_REGISTERED)) { + if (err == -EOPNOTSUPP && + adapter->netdev->reg_state == NETREG_REGISTERED) { e_dev_err("failed to initialize because an unsupported " "SFP+ module type was detected.\n"); e_dev_err("Reload the driver after installing a " @@ -7946,7 +7944,7 @@ static void ixgbe_service_timer(struct timer_list *t) static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - u32 status; + bool overtemp; if (!(adapter->flags2 & IXGBE_FLAG2_PHY_INTERRUPT)) return; @@ -7956,11 +7954,9 @@ static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter) if (!hw->phy.ops.handle_lasi) return; - status = hw->phy.ops.handle_lasi(&adapter->hw); - if (status != IXGBE_ERR_OVERTEMP) - return; - - e_crit(drv, "%s\n", ixgbe_overheat_msg); + hw->phy.ops.handle_lasi(&adapter->hw, &overtemp); + if (overtemp) + e_crit(drv, "%s\n", ixgbe_overheat_msg); } static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) @@ -10547,6 +10543,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); } +/** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + /** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure @@ -10563,6 +10597,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10571,9 +10610,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10601,9 +10637,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10612,6 +10645,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** @@ -10943,9 +10981,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = hw->mac.ops.reset_hw(hw); hw->phy.reset_if_overtemp = false; ixgbe_set_eee_capable(adapter); - if (err == IXGBE_ERR_SFP_NOT_PRESENT) { + if (err == -ENOENT) { err = 0; - } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + } else if (err == -EOPNOTSUPP) { e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n"); e_dev_err("Reload the driver after installing a supported module.\n"); goto err_sw_init; @@ -11161,7 +11199,7 @@ skip_sriov: /* reset the hardware with the new settings */ err = hw->mac.ops.start_hw(hw); - if (err == IXGBE_ERR_EEPROM_VERSION) { + if (err == -EACCES) { /* We are running on a pre-production device, log a warning */ e_dev_warn("This device is a pre-production adapter/LOM. " "Please be aware there may be issues associated " diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c index 5679293e53f7..fe7ef5773369 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c @@ -24,7 +24,7 @@ s32 ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) size = mbx->size; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->read(hw, msg, size, mbx_id); } @@ -43,10 +43,10 @@ s32 ixgbe_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (size > mbx->size) - return IXGBE_ERR_MBX; + return -EINVAL; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->write(hw, msg, size, mbx_id); } @@ -63,7 +63,7 @@ s32 ixgbe_check_for_msg(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_msg(hw, mbx_id); } @@ -80,7 +80,7 @@ s32 ixgbe_check_for_ack(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_ack(hw, mbx_id); } @@ -97,7 +97,7 @@ s32 ixgbe_check_for_rst(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_rst(hw, mbx_id); } @@ -115,12 +115,12 @@ static s32 ixgbe_poll_for_msg(struct ixgbe_hw *hw, u16 mbx_id) int countdown = mbx->timeout; if (!countdown || !mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; while (mbx->ops->check_for_msg(hw, mbx_id)) { countdown--; if (!countdown) - return IXGBE_ERR_MBX; + return -EIO; udelay(mbx->usec_delay); } @@ -140,12 +140,12 @@ static s32 ixgbe_poll_for_ack(struct ixgbe_hw *hw, u16 mbx_id) int countdown = mbx->timeout; if (!countdown || !mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; while (mbx->ops->check_for_ack(hw, mbx_id)) { countdown--; if (!countdown) - return IXGBE_ERR_MBX; + return -EIO; udelay(mbx->usec_delay); } @@ -169,7 +169,7 @@ static s32 ixgbe_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, s32 ret_val; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; ret_val = ixgbe_poll_for_msg(hw, mbx_id); if (ret_val) @@ -197,7 +197,7 @@ static s32 ixgbe_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, /* exit if either we can't write or there isn't a defined timeout */ if (!mbx->ops || !mbx->timeout) - return IXGBE_ERR_MBX; + return -EIO; /* send msg */ ret_val = mbx->ops->write(hw, msg, size, mbx_id); @@ -217,7 +217,7 @@ static s32 ixgbe_check_for_bit_pf(struct ixgbe_hw *hw, u32 mask, s32 index) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -238,7 +238,7 @@ static s32 ixgbe_check_for_msg_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -259,7 +259,7 @@ static s32 ixgbe_check_for_ack_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -295,7 +295,7 @@ static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -317,7 +317,7 @@ static s32 ixgbe_obtain_mbx_lock_pf(struct ixgbe_hw *hw, u16 vf_number) if (p2v_mailbox & IXGBE_PFMAILBOX_PFU) return 0; - return IXGBE_ERR_MBX; + return -EIO; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index 8f4316b19278..6434c190e7a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -7,7 +7,6 @@ #include "ixgbe_type.h" #define IXGBE_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ -#define IXGBE_ERR_MBX -100 #define IXGBE_VFMAILBOX 0x002FC #define IXGBE_VFMBMEM 0x00200 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 123dca9ce468..305afb82388b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -102,7 +102,7 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, csum = ~csum; do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); /* Device Address and write indication */ if (ixgbe_out_i2c_byte_ack(hw, addr)) @@ -150,7 +150,7 @@ fail: hw_dbg(hw, "I2C byte read combined error.\n"); } while (retry < max_retry); - return IXGBE_ERR_I2C; + return -EIO; } /** @@ -179,7 +179,7 @@ s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, csum = ~csum; do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); /* Device Address and write indication */ if (ixgbe_out_i2c_byte_ack(hw, addr)) @@ -215,7 +215,7 @@ fail: hw_dbg(hw, "I2C byte write combined error.\n"); } while (retry < max_retry); - return IXGBE_ERR_I2C; + return -EIO; } /** @@ -262,8 +262,8 @@ static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr) **/ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) { + u32 status = -EFAULT; u32 phy_addr; - u32 status = IXGBE_ERR_PHY_ADDR_INVALID; if (!hw->phy.phy_semaphore_mask) { if (hw->bus.lan_id) @@ -282,7 +282,7 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) if (ixgbe_probe_phy(hw, phy_addr)) return 0; else - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; } for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { @@ -408,8 +408,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) return status; /* Don't reset PHY if it's shut down due to overtemp. */ - if (!hw->phy.reset_if_overtemp && - (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw))) + if (!hw->phy.reset_if_overtemp && hw->phy.ops.check_overtemp(hw)) return 0; /* Blocked by MNG FW so bail */ @@ -457,7 +456,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) if (ctrl & MDIO_CTRL1_RESET) { hw_dbg(hw, "PHY reset polling failed to complete.\n"); - return IXGBE_ERR_RESET_FAILED; + return -EIO; } return 0; @@ -500,7 +499,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY address command did not complete.\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Address cycle complete, setup and write the read @@ -527,7 +526,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY read command didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Read operation is complete. Get the data @@ -559,7 +558,7 @@ s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, phy_data); hw->mac.ops.release_swfw_sync(hw, gssr); } else { - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } return status; @@ -604,7 +603,7 @@ s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY address cmd didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* @@ -632,7 +631,7 @@ s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY write cmd didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -657,7 +656,7 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, phy_data); hw->mac.ops.release_swfw_sync(hw, gssr); } else { - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } return status; @@ -1303,7 +1302,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) if ((phy_data & MDIO_CTRL1_RESET) != 0) { hw_dbg(hw, "PHY reset did not complete.\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Get init offsets */ @@ -1360,12 +1359,12 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) hw_dbg(hw, "SOL\n"); } else { hw_dbg(hw, "Bad control value\n"); - return IXGBE_ERR_PHY; + return -EIO; } break; default: hw_dbg(hw, "Bad control type\n"); - return IXGBE_ERR_PHY; + return -EIO; } } @@ -1373,7 +1372,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) err_eeprom: hw_err(hw, "eeprom read at offset %d failed\n", data_offset); - return IXGBE_ERR_PHY; + return -EIO; } /** @@ -1391,10 +1390,10 @@ s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw) return ixgbe_identify_qsfp_module_generic(hw); default: hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1419,7 +1418,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) { hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /* LAN ID is needed for sfp_type determination */ @@ -1434,7 +1433,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) if (identifier != IXGBE_SFF_IDENTIFIER_SFP) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_1GBE_COMP_CODES, @@ -1625,7 +1624,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } /* Anything else 82598-based is supported */ @@ -1649,7 +1648,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) } hw_dbg(hw, "SFP+ module not supported\n"); hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; @@ -1659,7 +1658,7 @@ err_read_i2c_eeprom: hw->phy.id = 0; hw->phy.type = ixgbe_phy_unknown; } - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1686,7 +1685,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) { hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /* LAN ID is needed for sfp_type determination */ @@ -1700,7 +1699,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) if (identifier != IXGBE_SFF_IDENTIFIER_QSFP_PLUS) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } hw->phy.id = identifier; @@ -1768,7 +1767,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) } else { /* unsupported module type */ hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } } @@ -1828,7 +1827,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) } hw_dbg(hw, "QSFP module not supported\n"); hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; } @@ -1839,7 +1838,7 @@ err_read_i2c_eeprom: hw->phy.id = 0; hw->phy.type = ixgbe_phy_unknown; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1859,14 +1858,14 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 sfp_type = hw->phy.sfp_type; if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) && (hw->phy.sfp_type == ixgbe_sfp_type_da_cu)) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; /* * Limiting active cables and 1G Phys must be initialized as @@ -1887,11 +1886,11 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, if (hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset)) { hw_err(hw, "eeprom read at %d failed\n", IXGBE_PHY_INIT_OFFSET_NL); - return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + return -EIO; } if ((!*list_offset) || (*list_offset == 0xFFFF)) - return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + return -EIO; /* Shift offset to first ID word */ (*list_offset)++; @@ -1910,7 +1909,7 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, goto err_phy; if ((!*data_offset) || (*data_offset == 0xFFFF)) { hw_dbg(hw, "SFP+ module not supported\n"); - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } else { break; } @@ -1923,14 +1922,14 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, if (sfp_id == IXGBE_PHY_INIT_END_NL) { hw_dbg(hw, "No matching SFP+ module found\n"); - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; err_phy: hw_err(hw, "eeprom read at offset %d failed\n", *list_offset); - return IXGBE_ERR_PHY; + return -EIO; } /** @@ -2025,7 +2024,7 @@ static s32 ixgbe_read_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset, do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); @@ -2141,7 +2140,7 @@ static s32 ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset, u32 swfw_mask = hw->phy.phy_semaphore_mask; if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; do { ixgbe_i2c_start(hw); @@ -2383,7 +2382,7 @@ static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw) if (ack == 1) { hw_dbg(hw, "I2C ack was not received.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; } ixgbe_lower_i2c_clk(hw, &i2cctl); @@ -2455,7 +2454,7 @@ static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data) udelay(IXGBE_I2C_T_LOW); } else { hw_dbg(hw, "I2C data was not set to %X\n", data); - return IXGBE_ERR_I2C; + return -EIO; } return 0; @@ -2551,7 +2550,7 @@ static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data) *i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw)); if (data != ixgbe_get_i2c_data(hw, i2cctl)) { hw_dbg(hw, "Error - I2C data was not set to %X.\n", data); - return IXGBE_ERR_I2C; + return -EIO; } return 0; @@ -2621,22 +2620,24 @@ static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * * Checks if the LASI temp alarm status was triggered due to overtemp + * + * Return true when an overtemp event detected, otherwise false. **/ -s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) +bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) { u16 phy_data = 0; + u32 status; if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM) - return 0; + return false; /* Check that the LASI temp alarm status was triggered */ - hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, - MDIO_MMD_PMAPMD, &phy_data); + status = hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, + MDIO_MMD_PMAPMD, &phy_data); + if (status) + return false; - if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM)) - return 0; - - return IXGBE_ERR_OVERTEMP; + return !!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM); } /** ixgbe_set_copper_phy_power - Control power for copper phy diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index 6544c4539c0d..ef72729d7c93 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -155,7 +155,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 *list_offset, u16 *data_offset); -s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); +bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, u8 *data); s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index ea88ac04ab9a..198ab9d97618 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1329,7 +1329,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) break; default: e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]); - retval = IXGBE_ERR_MBX; + retval = -EIO; break; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2b00db92b08f..61b9774b3d31 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3509,10 +3509,10 @@ struct ixgbe_phy_operations { s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *); s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); - s32 (*check_overtemp)(struct ixgbe_hw *); + bool (*check_overtemp)(struct ixgbe_hw *); s32 (*set_phy_power)(struct ixgbe_hw *, bool on); s32 (*enter_lplu)(struct ixgbe_hw *); - s32 (*handle_lasi)(struct ixgbe_hw *hw); + s32 (*handle_lasi)(struct ixgbe_hw *hw, bool *); s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, u8 *value); s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, @@ -3665,45 +3665,6 @@ struct ixgbe_info { const u32 *mvals; }; - -/* Error Codes */ -#define IXGBE_ERR_EEPROM -1 -#define IXGBE_ERR_EEPROM_CHECKSUM -2 -#define IXGBE_ERR_PHY -3 -#define IXGBE_ERR_CONFIG -4 -#define IXGBE_ERR_PARAM -5 -#define IXGBE_ERR_MAC_TYPE -6 -#define IXGBE_ERR_UNKNOWN_PHY -7 -#define IXGBE_ERR_LINK_SETUP -8 -#define IXGBE_ERR_ADAPTER_STOPPED -9 -#define IXGBE_ERR_INVALID_MAC_ADDR -10 -#define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11 -#define IXGBE_ERR_PRIMARY_REQUESTS_PENDING -12 -#define IXGBE_ERR_INVALID_LINK_SETTINGS -13 -#define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14 -#define IXGBE_ERR_RESET_FAILED -15 -#define IXGBE_ERR_SWFW_SYNC -16 -#define IXGBE_ERR_PHY_ADDR_INVALID -17 -#define IXGBE_ERR_I2C -18 -#define IXGBE_ERR_SFP_NOT_SUPPORTED -19 -#define IXGBE_ERR_SFP_NOT_PRESENT -20 -#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -21 -#define IXGBE_ERR_NO_SAN_ADDR_PTR -22 -#define IXGBE_ERR_FDIR_REINIT_FAILED -23 -#define IXGBE_ERR_EEPROM_VERSION -24 -#define IXGBE_ERR_NO_SPACE -25 -#define IXGBE_ERR_OVERTEMP -26 -#define IXGBE_ERR_FC_NOT_NEGOTIATED -27 -#define IXGBE_ERR_FC_NOT_SUPPORTED -28 -#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE -30 -#define IXGBE_ERR_PBA_SECTION -31 -#define IXGBE_ERR_INVALID_ARGUMENT -32 -#define IXGBE_ERR_HOST_INTERFACE_COMMAND -33 -#define IXGBE_ERR_FDIR_CMD_INCOMPLETE -38 -#define IXGBE_ERR_FW_RESP_INVALID -39 -#define IXGBE_ERR_TOKEN_RETRY -40 -#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF - #define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4)) #define IXGBE_FUSES0_300MHZ BIT(5) #define IXGBE_FUSES0_REV_MASK (3u << 6) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index d5cfb51ff648..15325c549d9b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -84,7 +84,7 @@ mac_reset_top: status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); if (status) { hw_dbg(hw, "semaphore failed with %d", status); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ctrl = IXGBE_CTRL_RST; @@ -103,7 +103,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } msleep(100); @@ -220,7 +220,7 @@ static s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data) s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_read_eerd_generic(hw, offset, data); @@ -243,7 +243,7 @@ static s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_read_eerd_buffer_generic(hw, offset, words, data); @@ -264,7 +264,7 @@ static s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data) s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_eewr_generic(hw, offset, data); @@ -287,7 +287,7 @@ static s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_eewr_buffer_generic(hw, offset, words, data); @@ -324,7 +324,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) for (i = 0; i < checksum_last_word; i++) { if (ixgbe_read_eerd_generic(hw, i, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -349,7 +349,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) if (ixgbe_read_eerd_generic(hw, pointer, &length)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Skip pointer section if length is invalid. */ @@ -360,7 +360,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) for (j = pointer + 1; j <= pointer + length; j++) { if (ixgbe_read_eerd_generic(hw, j, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -397,7 +397,7 @@ static s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, } if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->eeprom.ops.calc_checksum(hw); if (status < 0) @@ -418,7 +418,7 @@ static s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, */ if (read_checksum != checksum) { hw_dbg(hw, "Invalid EEPROM checksum"); - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; } /* If the user cares, return the calculated checksum */ @@ -455,7 +455,7 @@ static s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw) } if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->eeprom.ops.calc_checksum(hw); if (status < 0) @@ -490,7 +490,7 @@ static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw) s32 status; status = ixgbe_poll_flash_update_done_X540(hw); - if (status == IXGBE_ERR_EEPROM) { + if (status == -EIO) { hw_dbg(hw, "Flash update time out\n"); return status; } @@ -540,7 +540,7 @@ static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw) return 0; udelay(5); } - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -575,7 +575,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) * SW_FW_SYNC bits (not just NVM) */ if (ixgbe_get_swfw_sync_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw)); if (!(swfw_sync & (fwmask | swmask | hwmask))) { @@ -599,7 +599,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) * bits in the SW_FW_SYNC register. */ if (ixgbe_get_swfw_sync_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw)); if (swfw_sync & (fwmask | hwmask)) { swfw_sync |= swmask; @@ -622,11 +622,11 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) rmask |= IXGBE_GSSR_I2C_MASK; ixgbe_release_swfw_sync_X540(hw, rmask); ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } /** @@ -680,7 +680,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) if (i == timeout) { hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Now get the semaphore between SW/FW through the REGSMP bit */ @@ -697,7 +697,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) */ hw_dbg(hw, "REGSMP Software NVM semaphore not granted\n"); ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -768,7 +768,7 @@ s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index) bool link_up; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Link should be up in order for the blink bit in the LED control * register to work. Force link and speed in the MAC if link is down. @@ -804,7 +804,7 @@ s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index) u32 ledctl_reg; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Restore the LED to its default value. */ ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index aa4bf6c9a2f7..cdc912bba808 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -206,13 +206,13 @@ static s32 ixgbe_reset_cs4227(struct ixgbe_hw *hw) } if (retry == IXGBE_CS4227_RETRIES) { hw_err(hw, "CS4227 reset did not complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } status = ixgbe_read_cs4227(hw, IXGBE_CS4227_EEPROM_STATUS, &value); if (status || !(value & IXGBE_CS4227_EEPROM_LOAD_OK)) { hw_err(hw, "CS4227 EEPROM did not load successfully\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -350,13 +350,13 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) static s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 *phy_data) { - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; } static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 phy_data) { - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; } /** @@ -463,7 +463,7 @@ s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity, --retries; } while (retries > 0); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; } static const struct { @@ -511,7 +511,7 @@ static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw) hw->phy.id |= phy_id_lo & IXGBE_PHY_REVISION_MASK; hw->phy.revision = phy_id_lo & ~IXGBE_PHY_REVISION_MASK; if (!hw->phy.id || hw->phy.id == IXGBE_PHY_REVISION_MASK) - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; hw->phy.autoneg_advertised = hw->phy.speeds_supported; hw->phy.eee_speeds_supported = IXGBE_LINK_SPEED_100_FULL | @@ -568,7 +568,7 @@ static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw) if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } switch (hw->fc.requested_mode) { @@ -600,8 +600,10 @@ static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw) rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_SETUP_LINK, &setup); if (rc) return rc; + if (setup[0] == FW_PHY_ACT_SETUP_LINK_RSP_DOWN) - return IXGBE_ERR_OVERTEMP; + return -EIO; + return 0; } @@ -675,7 +677,7 @@ static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) *ctrl = command; if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { hw_dbg(hw, "IOSF wait timed out\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -715,7 +717,8 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT; hw_dbg(hw, "Failed to read, error %x\n", error); - return IXGBE_ERR_PHY; + ret = -EIO; + goto out; } if (!ret) @@ -750,9 +753,9 @@ static s32 ixgbe_get_phy_token(struct ixgbe_hw *hw) if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK) return 0; if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY) - return IXGBE_ERR_FW_RESP_INVALID; + return -EIO; - return IXGBE_ERR_TOKEN_RETRY; + return -EAGAIN; } /** @@ -778,7 +781,7 @@ static s32 ixgbe_put_phy_token(struct ixgbe_hw *hw) return status; if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK) return 0; - return IXGBE_ERR_FW_RESP_INVALID; + return -EIO; } /** @@ -942,7 +945,7 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr, local_buffer = buf; } else { if (buffer_size < ptr) - return IXGBE_ERR_PARAM; + return -EINVAL; local_buffer = &buffer[ptr]; } @@ -960,7 +963,7 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr, } if (buffer && ((u32)start + (u32)length > buffer_size)) - return IXGBE_ERR_PARAM; + return -EINVAL; for (i = start; length; i++, length--) { if (i == bufsz && !buffer) { @@ -1012,7 +1015,7 @@ static s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer, local_buffer = eeprom_ptrs; } else { if (buffer_size < IXGBE_EEPROM_LAST_WORD) - return IXGBE_ERR_PARAM; + return -EINVAL; local_buffer = buffer; } @@ -1148,7 +1151,7 @@ static s32 ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw, * calculated checksum */ if (read_checksum != checksum) { - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; hw_dbg(hw, "Invalid EEPROM checksum"); } @@ -1203,7 +1206,7 @@ static s32 ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 data) hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); } else { hw_dbg(hw, "write ee hostif failed to get semaphore"); - status = IXGBE_ERR_SWFW_SYNC; + status = -EBUSY; } return status; @@ -1415,7 +1418,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT; hw_dbg(hw, "Failed to write, error %x\n", error); - return IXGBE_ERR_PHY; + return -EIO; } out: @@ -1558,7 +1561,7 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) /* iXFI is only supported with X552 */ if (mac->type != ixgbe_mac_X550EM_x) - return IXGBE_ERR_LINK_SETUP; + return -EIO; /* Disable AN and force speed to 10G Serial. */ status = ixgbe_read_iosf_sb_reg_x550(hw, @@ -1580,7 +1583,7 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) break; default: /* Other link speeds are not supported by internal KR PHY. */ - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; } status = ixgbe_write_iosf_sb_reg_x550(hw, @@ -1611,7 +1614,7 @@ static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear) { switch (hw->phy.sfp_type) { case ixgbe_sfp_type_not_present: - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; case ixgbe_sfp_type_da_cu_core0: case ixgbe_sfp_type_da_cu_core1: *linear = true; @@ -1630,7 +1633,7 @@ static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear) case ixgbe_sfp_type_1g_cu_core0: case ixgbe_sfp_type_1g_cu_core1: default: - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; @@ -1660,7 +1663,7 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, * there is no reason to configure CS4227 and SFP not present error is * not accepted in the setup MAC link flow. */ - if (status == IXGBE_ERR_SFP_NOT_PRESENT) + if (status == -ENOENT) return 0; if (status) @@ -1718,7 +1721,7 @@ static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed) break; default: /* Other link speeds are not supported by internal PHY. */ - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; } (void)mac->ops.write_iosf_sb_reg(hw, @@ -1803,7 +1806,7 @@ ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == -ENOENT) return 0; if (ret_val) @@ -1853,7 +1856,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == -ENOENT) return 0; if (ret_val) @@ -1863,7 +1866,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, ixgbe_setup_kr_speed_x550em(hw, speed); if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE) - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; /* Get external PHY SKU id */ ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_EFUSE_PDF_SKU, @@ -1962,7 +1965,7 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, u16 i, autoneg_status; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) - return IXGBE_ERR_CONFIG; + return -EIO; status = ixgbe_check_mac_link_generic(hw, speed, link_up, link_up_wait_to_complete); @@ -2145,9 +2148,9 @@ static s32 ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed, */ static void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw) { - s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 }; ixgbe_link_speed speed; + s32 status = -EIO; bool link_up; /* AN should have completed when the cable was plugged in. @@ -2165,7 +2168,7 @@ static void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw) /* Check if auto-negotiation has completed */ status = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &info); if (status || !(info[0] & FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE)) { - status = IXGBE_ERR_FC_NOT_NEGOTIATED; + status = -EIO; goto out; } @@ -2369,18 +2372,18 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, * @hw: pointer to hardware structure * @lsc: pointer to boolean flag which indicates whether external Base T * PHY interrupt is lsc + * @is_overtemp: indicate whether an overtemp event encountered * * Determime if external Base T PHY interrupt cause is high temperature * failure alarm or link status change. - * - * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature - * failure alarm, else return PHY access status. **/ -static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) +static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc, + bool *is_overtemp) { u32 status; u16 reg; + *is_overtemp = false; *lsc = false; /* Vendor alarm triggered */ @@ -2412,7 +2415,8 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) if (reg & IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL) { /* power down the PHY in case the PHY FW didn't already */ ixgbe_set_copper_phy_power(hw, false); - return IXGBE_ERR_OVERTEMP; + *is_overtemp = true; + return -EIO; } if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { /* device fault alarm triggered */ @@ -2426,7 +2430,8 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) { /* power down the PHY in case the PHY FW didn't */ ixgbe_set_copper_phy_power(hw, false); - return IXGBE_ERR_OVERTEMP; + *is_overtemp = true; + return -EIO; } } @@ -2462,12 +2467,12 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) **/ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) { + bool lsc, overtemp; u32 status; u16 reg; - bool lsc; /* Clear interrupt flags */ - status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); + status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc, &overtemp); /* Enable link status change alarm */ @@ -2546,21 +2551,20 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) /** * ixgbe_handle_lasi_ext_t_x550em - Handle external Base T PHY interrupt * @hw: pointer to hardware structure + * @is_overtemp: indicate whether an overtemp event encountered * * Handle external Base T PHY interrupt. If high temperature * failure alarm then return error, else if link status change * then setup internal/external PHY link - * - * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature - * failure alarm, else return PHY access status. **/ -static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw) +static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw, + bool *is_overtemp) { struct ixgbe_phy_info *phy = &hw->phy; bool lsc; u32 status; - status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); + status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc, is_overtemp); if (status) return status; @@ -2692,7 +2696,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) u16 speed; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) - return IXGBE_ERR_CONFIG; + return -EIO; if (!(hw->mac.type == ixgbe_mac_X550EM_x && !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))) { @@ -2735,7 +2739,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) break; default: /* Internal PHY does not support anything else */ - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } return ixgbe_setup_ixfi_x550em(hw, &force_speed); @@ -2767,7 +2771,7 @@ static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) u16 phy_data; if (led_idx >= IXGBE_X557_MAX_LED_INDEX) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, @@ -2789,7 +2793,7 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) u16 phy_data; if (led_idx >= IXGBE_X557_MAX_LED_INDEX) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, @@ -2813,8 +2817,9 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) * * Sends driver version number to firmware through the manageability * block. On success return 0 - * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * else returns -EBUSY when encountering an error acquiring + * semaphore, -EIO when command fails or -ENIVAL when incorrect + * params passed. **/ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, u8 sub, u16 len, @@ -2825,7 +2830,7 @@ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, int i; if (!len || !driver_ver || (len > sizeof(fw_cmd.driver_string))) - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN + len; @@ -2850,7 +2855,7 @@ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, if (fw_cmd.hdr.cmd_or_resp.ret_status != FW_CEM_RESP_STATUS_SUCCESS) - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; return 0; } @@ -2907,7 +2912,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } /* 10gig parts do not have a word in the EEPROM to determine the @@ -2942,7 +2947,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) break; default: hw_err(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (hw->device_id) { @@ -2986,8 +2991,8 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) { u32 link_s1, lp_an_page_low, an_cntl_1; - s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; ixgbe_link_speed speed; + s32 status = -EIO; bool link_up; /* AN should have completed when the cable was plugged in. @@ -3013,7 +3018,7 @@ static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) if (status || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) { hw_dbg(hw, "Auto-Negotiation did not complete\n"); - status = IXGBE_ERR_FC_NOT_NEGOTIATED; + status = -EIO; goto out; } @@ -3187,21 +3192,23 @@ static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw) /** * ixgbe_check_overtemp_fw - Check firmware-controlled PHYs for overtemp * @hw: pointer to hardware structure + * + * Return true when an overtemp event detected, otherwise false. */ -static s32 ixgbe_check_overtemp_fw(struct ixgbe_hw *hw) +static bool ixgbe_check_overtemp_fw(struct ixgbe_hw *hw) { u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 }; s32 rc; rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &store); if (rc) - return rc; + return false; if (store[0] & FW_PHY_ACT_GET_LINK_INFO_TEMP) { ixgbe_shutdown_fw_phy(hw); - return IXGBE_ERR_OVERTEMP; + return true; } - return 0; + return false; } /** @@ -3251,8 +3258,7 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* Identify the PHY or SFP module */ ret_val = phy->ops.identify(hw); - if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED || - ret_val == IXGBE_ERR_PHY_ADDR_INVALID) + if (ret_val == -EOPNOTSUPP || ret_val == -EFAULT) return ret_val; /* Setup function pointers based on detected hardware */ @@ -3460,8 +3466,7 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || - status == IXGBE_ERR_PHY_ADDR_INVALID) + if (status == -EOPNOTSUPP || status == -EFAULT) return status; /* start the external PHY */ @@ -3477,7 +3482,7 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) hw->phy.sfp_setup_needed = false; } - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Reset PHY */ @@ -3501,7 +3506,7 @@ mac_reset_top: status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); if (status) { hw_dbg(hw, "semaphore failed with %d", status); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); @@ -3519,7 +3524,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -3615,7 +3620,7 @@ static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } if (hw->fc.requested_mode == ixgbe_fc_default) @@ -3672,7 +3677,7 @@ static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) break; default: hw_err(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } status = hw->mac.ops.write_iosf_sb_reg(hw, @@ -3768,7 +3773,7 @@ static s32 ixgbe_acquire_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask) return 0; if (hmask) ixgbe_release_swfw_sync_X540(hw, hmask); - if (status != IXGBE_ERR_TOKEN_RETRY) + if (status != -EAGAIN) return status; msleep(FW_PHY_TOKEN_DELAY); } @@ -3812,7 +3817,7 @@ static s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->phy.ops.read_reg_mdi(hw, reg_addr, device_type, phy_data); @@ -3838,7 +3843,7 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type, phy_data); hw->mac.ops.release_swfw_sync(hw, mask); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f936640cca4e..2f80ee84c7ec 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -614,12 +614,38 @@ static void mvpp23_bm_set_8pool_mode(struct mvpp2 *priv) mvpp2_write(priv, MVPP22_BM_POOL_BASE_ADDR_HIGH_REG, val); } +/* Cleanup pool before actual initialization in the OS */ +static void mvpp2_bm_pool_cleanup(struct mvpp2 *priv, int pool_id) +{ + unsigned int thread = mvpp2_cpu_to_thread(priv, get_cpu()); + u32 val; + int i; + + /* Drain the BM from all possible residues left by firmware */ + for (i = 0; i < MVPP2_BM_POOL_SIZE_MAX; i++) + mvpp2_thread_read(priv, thread, MVPP2_BM_PHY_ALLOC_REG(pool_id)); + + put_cpu(); + + /* Stop the BM pool */ + val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(pool_id)); + val |= MVPP2_BM_STOP_MASK; + mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(pool_id), val); +} + static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) { enum dma_data_direction dma_dir = DMA_FROM_DEVICE; int i, err, poolnum = MVPP2_BM_POOLS_NUM; struct mvpp2_port *port; + if (priv->percpu_pools) + poolnum = mvpp2_get_nrxqs(priv) * 2; + + /* Clean up the pool state in case it contains stale state */ + for (i = 0; i < poolnum; i++) + mvpp2_bm_pool_cleanup(priv, i); + if (priv->percpu_pools) { for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; @@ -629,7 +655,6 @@ static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) } } - poolnum = mvpp2_get_nrxqs(priv) * 2; for (i = 0; i < poolnum; i++) { /* the pool in use */ int pn = i / (poolnum / 2); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 90be87dc105d..e6fe599f7bf3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1346,7 +1346,7 @@ static irqreturn_t cgx_fwi_event_handler(int irq, void *data) /* Release thread waiting for completion */ lmac->cmd_pend = false; - wake_up_interruptible(&lmac->wq_cmd_cmplt); + wake_up(&lmac->wq_cmd_cmplt); break; case CGX_EVT_ASYNC: if (cgx_event_is_linkevent(event)) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 9690ac01f02c..7d741e3ba8c5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -214,11 +214,12 @@ int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid) } EXPORT_SYMBOL(otx2_mbox_busy_poll_for_rsp); -void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) +static void otx2_mbox_msg_send_data(struct otx2_mbox *mbox, int devid, u64 data) { struct otx2_mbox_dev *mdev = &mbox->dev[devid]; struct mbox_hdr *tx_hdr, *rx_hdr; void *hw_mbase = mdev->hwbase; + u64 intr_val; tx_hdr = hw_mbase + mbox->tx_start; rx_hdr = hw_mbase + mbox->rx_start; @@ -254,14 +255,52 @@ void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) spin_unlock(&mdev->mbox_lock); + /* Check if interrupt pending */ + intr_val = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + intr_val |= data; /* The interrupt should be fired after num_msgs is written * to the shared memory */ - writeq(1, (void __iomem *)mbox->reg_base + + writeq(intr_val, (void __iomem *)mbox->reg_base + (mbox->trigger | (devid << mbox->tr_shift))); } + +void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) +{ + otx2_mbox_msg_send_data(mbox, devid, MBOX_DOWN_MSG); +} EXPORT_SYMBOL(otx2_mbox_msg_send); +void otx2_mbox_msg_send_up(struct otx2_mbox *mbox, int devid) +{ + otx2_mbox_msg_send_data(mbox, devid, MBOX_UP_MSG); +} +EXPORT_SYMBOL(otx2_mbox_msg_send_up); + +bool otx2_mbox_wait_for_zero(struct otx2_mbox *mbox, int devid) +{ + u64 data; + + data = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + /* If data is non-zero wait for ~1ms and return to caller + * whether data has changed to zero or not after the wait. + */ + if (!data) + return true; + + usleep_range(950, 1000); + + data = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + return data == 0; +} +EXPORT_SYMBOL(otx2_mbox_wait_for_zero); + struct mbox_msghdr *otx2_mbox_alloc_msg_rsp(struct otx2_mbox *mbox, int devid, int size, int size_rsp) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 03ebabd61635..be70269e9168 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -16,6 +16,9 @@ #define MBOX_SIZE SZ_64K +#define MBOX_DOWN_MSG 1 +#define MBOX_UP_MSG 2 + /* AF/PF: PF initiated, PF/VF VF initiated */ #define MBOX_DOWN_RX_START 0 #define MBOX_DOWN_RX_SIZE (46 * SZ_1K) @@ -101,6 +104,7 @@ int otx2_mbox_regions_init(struct otx2_mbox *mbox, void __force **hwbase, struct pci_dev *pdev, void __force *reg_base, int direction, int ndevs, unsigned long *bmap); void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid); +void otx2_mbox_msg_send_up(struct otx2_mbox *mbox, int devid); int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid); int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid); struct mbox_msghdr *otx2_mbox_alloc_msg_rsp(struct otx2_mbox *mbox, int devid, @@ -118,6 +122,8 @@ static inline struct mbox_msghdr *otx2_mbox_alloc_msg(struct otx2_mbox *mbox, return otx2_mbox_alloc_msg_rsp(mbox, devid, size, 0); } +bool otx2_mbox_wait_for_zero(struct otx2_mbox *mbox, int devid); + /* Mailbox message types */ #define MBOX_MSG_MASK 0xFFFF #define MBOX_MSG_INVALID 0xFFFE @@ -196,6 +202,9 @@ M(CPT_STATS, 0xA05, cpt_sts, cpt_sts_req, cpt_sts_rsp) \ M(CPT_RXC_TIME_CFG, 0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req, \ msg_rsp) \ M(CPT_CTX_CACHE_SYNC, 0xA07, cpt_ctx_cache_sync, msg_req, msg_rsp) \ +M(CPT_LF_RESET, 0xA08, cpt_lf_reset, cpt_lf_rst_req, msg_rsp) \ +M(CPT_FLT_ENG_INFO, 0xA09, cpt_flt_eng_info, cpt_flt_eng_info_req, \ + cpt_flt_eng_info_rsp) \ /* SDP mbox IDs (range 0x1000 - 0x11FF) */ \ M(SET_SDP_CHAN_INFO, 0x1000, set_sdp_chan_info, sdp_chan_info_msg, msg_rsp) \ M(GET_SDP_CHAN_INFO, 0x1001, get_sdp_chan_info, msg_req, sdp_get_chan_info_msg) \ @@ -1702,6 +1711,28 @@ struct cpt_inst_lmtst_req { u64 rsvd; }; +/* Mailbox message format to request for CPT LF reset */ +struct cpt_lf_rst_req { + struct mbox_msghdr hdr; + u32 slot; + u32 rsvd; +}; + +/* Mailbox message format to request for CPT faulted engines */ +struct cpt_flt_eng_info_req { + struct mbox_msghdr hdr; + int blkaddr; + bool reset; + u32 rsvd; +}; + +struct cpt_flt_eng_info_rsp { + struct mbox_msghdr hdr; + u64 flt_eng_map[CPT_10K_AF_INT_VEC_RVU]; + u64 rcvrd_eng_map[CPT_10K_AF_INT_VEC_RVU]; + u64 rsvd; +}; + struct sdp_node_info { /* Node to which this PF belons to */ u8 node_id; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index dfd23580e3b8..d39d86e694cc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -121,13 +121,17 @@ int mcs_add_intr_wq_entry(struct mcs *mcs, struct mcs_intr_event *event) static int mcs_notify_pfvf(struct mcs_intr_event *event, struct rvu *rvu) { struct mcs_intr_info *req; - int err, pf; + int pf; pf = rvu_get_pf(event->pcifunc); + mutex_lock(&rvu->mbox_lock); + req = otx2_mbox_alloc_msg_mcs_intr_notify(rvu, pf); - if (!req) + if (!req) { + mutex_unlock(&rvu->mbox_lock); return -ENOMEM; + } req->mcs_id = event->mcs_id; req->intr_mask = event->intr_mask; @@ -135,10 +139,11 @@ static int mcs_notify_pfvf(struct mcs_intr_event *event, struct rvu *rvu) req->hdr.pcifunc = event->pcifunc; req->lmac_id = event->lmac_id; - otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, pf); - err = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pf); - if (err) - dev_warn(rvu->dev, "MCS notification to pf %d failed\n", pf); + otx2_mbox_wait_for_zero(&rvu->afpf_wq_info.mbox_up, pf); + + otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pf); + + mutex_unlock(&rvu->mbox_lock); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index d88d86bf07b0..a7034b47ed6c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1164,8 +1164,16 @@ cpt: goto nix_err; } + err = rvu_cpt_init(rvu); + if (err) { + dev_err(rvu->dev, "%s: Failed to initialize cpt\n", __func__); + goto mcs_err; + } + return 0; +mcs_err: + rvu_mcs_exit(rvu); nix_err: rvu_nix_freemem(rvu); npa_err: @@ -2106,7 +2114,7 @@ bad_message: } } -static void __rvu_mbox_handler(struct rvu_work *mwork, int type) +static void __rvu_mbox_handler(struct rvu_work *mwork, int type, bool poll) { struct rvu *rvu = mwork->rvu; int offset, err, id, devid; @@ -2173,6 +2181,9 @@ static void __rvu_mbox_handler(struct rvu_work *mwork, int type) } mw->mbox_wrk[devid].num_msgs = 0; + if (poll) + otx2_mbox_wait_for_zero(mbox, devid); + /* Send mbox responses to VF/PF */ otx2_mbox_msg_send(mbox, devid); } @@ -2180,15 +2191,18 @@ static void __rvu_mbox_handler(struct rvu_work *mwork, int type) static inline void rvu_afpf_mbox_handler(struct work_struct *work) { struct rvu_work *mwork = container_of(work, struct rvu_work, work); + struct rvu *rvu = mwork->rvu; - __rvu_mbox_handler(mwork, TYPE_AFPF); + mutex_lock(&rvu->mbox_lock); + __rvu_mbox_handler(mwork, TYPE_AFPF, true); + mutex_unlock(&rvu->mbox_lock); } static inline void rvu_afvf_mbox_handler(struct work_struct *work) { struct rvu_work *mwork = container_of(work, struct rvu_work, work); - __rvu_mbox_handler(mwork, TYPE_AFVF); + __rvu_mbox_handler(mwork, TYPE_AFVF, false); } static void __rvu_mbox_up_handler(struct rvu_work *mwork, int type) @@ -2363,6 +2377,8 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, } } + mutex_init(&rvu->mbox_lock); + mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL); if (!mbox_regions) { err = -ENOMEM; @@ -2512,10 +2528,9 @@ static void rvu_queue_work(struct mbox_wq_info *mw, int first, } } -static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) +static irqreturn_t rvu_mbox_pf_intr_handler(int irq, void *rvu_irq) { struct rvu *rvu = (struct rvu *)rvu_irq; - int vfs = rvu->vfs; u64 intr; intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT); @@ -2529,6 +2544,18 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) rvu_queue_work(&rvu->afpf_wq_info, 0, rvu->hw->total_pfs, intr); + return IRQ_HANDLED; +} + +static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) +{ + struct rvu *rvu = (struct rvu *)rvu_irq; + int vfs = rvu->vfs; + u64 intr; + + /* Sync with mbox memory region */ + rmb(); + /* Handle VF interrupts */ if (vfs > 64) { intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(1)); @@ -2865,7 +2892,7 @@ static int rvu_register_interrupts(struct rvu *rvu) /* Register mailbox interrupt handler */ sprintf(&rvu->irq_name[RVU_AF_INT_VEC_MBOX * NAME_SIZE], "RVUAF Mbox"); ret = request_irq(pci_irq_vector(rvu->pdev, RVU_AF_INT_VEC_MBOX), - rvu_mbox_intr_handler, 0, + rvu_mbox_pf_intr_handler, 0, &rvu->irq_name[RVU_AF_INT_VEC_MBOX * NAME_SIZE], rvu); if (ret) { dev_err(rvu->dev, @@ -3039,9 +3066,8 @@ static int rvu_flr_init(struct rvu *rvu) cfg | BIT_ULL(22)); } - rvu->flr_wq = alloc_workqueue("rvu_afpf_flr", - WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, - 1); + rvu->flr_wq = alloc_ordered_workqueue("rvu_afpf_flr", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!rvu->flr_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 0b76dfa979d4..a3ae21398ca7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -109,6 +109,8 @@ struct rvu_block { u64 lfreset_reg; unsigned char name[NAME_SIZE]; struct rvu *rvu; + u64 cpt_flt_eng_map[3]; + u64 cpt_rcvrd_eng_map[3]; }; struct nix_mcast { @@ -506,6 +508,7 @@ struct rvu { struct ptp *ptp; int mcs_blk_cnt; + int cpt_pf_num; #ifdef CONFIG_DEBUG_FS struct rvu_debugfs rvu_dbg; @@ -520,6 +523,10 @@ struct rvu { struct list_head mcs_intrq_head; /* mcs interrupt queue lock */ spinlock_t mcs_intrq_lock; + /* CPT interrupt lock */ + spinlock_t cpt_intr_lock; + + struct mutex mbox_lock; /* Serialize mbox up and down msgs */ }; static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) @@ -872,6 +879,7 @@ void rvu_cpt_unregister_interrupts(struct rvu *rvu); int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int slot); int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc); +int rvu_cpt_init(struct rvu *rvu); #define NDC_AF_BANK_MASK GENMASK_ULL(7, 0) #define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index bcb4385d0621..d1e6b12ecfa7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -232,7 +232,7 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) struct cgx_link_user_info *linfo; struct cgx_link_info_msg *msg; unsigned long pfmap; - int err, pfid; + int pfid; linfo = &event->link_uinfo; pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id); @@ -250,16 +250,22 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) continue; } + mutex_lock(&rvu->mbox_lock); + /* Send mbox message to PF */ msg = otx2_mbox_alloc_msg_cgx_link_event(rvu, pfid); - if (!msg) + if (!msg) { + mutex_unlock(&rvu->mbox_lock); continue; + } + msg->link_info = *linfo; - otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, pfid); - err = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pfid); - if (err) - dev_warn(rvu->dev, "notification to pf %d failed\n", - pfid); + + otx2_mbox_wait_for_zero(&rvu->afpf_wq_info.mbox_up, pfid); + + otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pfid); + + mutex_unlock(&rvu->mbox_lock); } while (pfmap); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 38bbae5d9ae0..6fb02b93c171 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -37,34 +37,68 @@ (_rsp)->free_sts_##etype = free_sts; \ }) -static irqreturn_t rvu_cpt_af_flt_intr_handler(int irq, void *ptr) +static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr) { struct rvu_block *block = ptr; struct rvu *rvu = block->rvu; int blkaddr = block->addr; - u64 reg0, reg1, reg2; + u64 reg, val; + int i, eng; + u8 grp; - reg0 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(0)); - reg1 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(1)); - if (!is_rvu_otx2(rvu)) { - reg2 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(2)); - dev_err_ratelimited(rvu->dev, - "Received CPTAF FLT irq : 0x%llx, 0x%llx, 0x%llx", - reg0, reg1, reg2); - } else { - dev_err_ratelimited(rvu->dev, - "Received CPTAF FLT irq : 0x%llx, 0x%llx", - reg0, reg1); + reg = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(vec)); + dev_err_ratelimited(rvu->dev, "Received CPTAF FLT%d irq : 0x%llx", vec, reg); + + i = -1; + while ((i = find_next_bit((unsigned long *)®, 64, i + 1)) < 64) { + switch (vec) { + case 0: + eng = i; + break; + case 1: + eng = i + 64; + break; + case 2: + eng = i + 128; + break; + } + grp = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng)) & 0xFF; + /* Disable and enable the engine which triggers fault */ + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng), 0x0); + val = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng)); + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng), val & ~1ULL); + + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng), grp); + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng), val | 1ULL); + + spin_lock(&rvu->cpt_intr_lock); + block->cpt_flt_eng_map[vec] |= BIT_ULL(i); + val = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_STS(eng)); + val = val & 0x3; + if (val == 0x1 || val == 0x2) + block->cpt_rcvrd_eng_map[vec] |= BIT_ULL(i); + spin_unlock(&rvu->cpt_intr_lock); } - - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(0), reg0); - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(1), reg1); - if (!is_rvu_otx2(rvu)) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(2), reg2); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(vec), reg); return IRQ_HANDLED; } +static irqreturn_t rvu_cpt_af_flt0_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_AF_INT_VEC_FLT0, ptr); +} + +static irqreturn_t rvu_cpt_af_flt1_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_AF_INT_VEC_FLT1, ptr); +} + +static irqreturn_t rvu_cpt_af_flt2_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_10K_AF_INT_VEC_FLT2, ptr); +} + static irqreturn_t rvu_cpt_af_rvu_intr_handler(int irq, void *ptr) { struct rvu_block *block = ptr; @@ -119,8 +153,10 @@ static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off) int i; /* Disable all CPT AF interrupts */ - for (i = 0; i < CPT_10K_AF_INT_VEC_RVU; i++) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF); + rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); @@ -151,7 +187,7 @@ static void cpt_unregister_interrupts(struct rvu *rvu, int blkaddr) /* Disable all CPT AF interrupts */ for (i = 0; i < CPT_AF_INT_VEC_RVU; i++) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), ~0ULL); rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); @@ -172,16 +208,31 @@ static int cpt_10k_register_interrupts(struct rvu_block *block, int off) { struct rvu *rvu = block->rvu; int blkaddr = block->addr; + irq_handler_t flt_fn; int i, ret; for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) { sprintf(&rvu->irq_name[(off + i) * NAME_SIZE], "CPTAF FLT%d", i); + + switch (i) { + case CPT_10K_AF_INT_VEC_FLT0: + flt_fn = rvu_cpt_af_flt0_intr_handler; + break; + case CPT_10K_AF_INT_VEC_FLT1: + flt_fn = rvu_cpt_af_flt1_intr_handler; + break; + case CPT_10K_AF_INT_VEC_FLT2: + flt_fn = rvu_cpt_af_flt2_intr_handler; + break; + } ret = rvu_cpt_do_register_interrupt(block, off + i, - rvu_cpt_af_flt_intr_handler, - &rvu->irq_name[(off + i) * NAME_SIZE]); + flt_fn, &rvu->irq_name[(off + i) * NAME_SIZE]); if (ret) goto err; - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); + if (i == CPT_10K_AF_INT_VEC_FLT2) + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0xFFFF); + else + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL); } ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RVU, @@ -208,8 +259,8 @@ static int cpt_register_interrupts(struct rvu *rvu, int blkaddr) { struct rvu_hwinfo *hw = rvu->hw; struct rvu_block *block; + irq_handler_t flt_fn; int i, offs, ret = 0; - char irq_name[16]; if (!is_block_implemented(rvu->hw, blkaddr)) return 0; @@ -226,13 +277,20 @@ static int cpt_register_interrupts(struct rvu *rvu, int blkaddr) return cpt_10k_register_interrupts(block, offs); for (i = CPT_AF_INT_VEC_FLT0; i < CPT_AF_INT_VEC_RVU; i++) { - snprintf(irq_name, sizeof(irq_name), "CPTAF FLT%d", i); + sprintf(&rvu->irq_name[(offs + i) * NAME_SIZE], "CPTAF FLT%d", i); + switch (i) { + case CPT_AF_INT_VEC_FLT0: + flt_fn = rvu_cpt_af_flt0_intr_handler; + break; + case CPT_AF_INT_VEC_FLT1: + flt_fn = rvu_cpt_af_flt1_intr_handler; + break; + } ret = rvu_cpt_do_register_interrupt(block, offs + i, - rvu_cpt_af_flt_intr_handler, - irq_name); + flt_fn, &rvu->irq_name[(offs + i) * NAME_SIZE]); if (ret) goto err; - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL); } ret = rvu_cpt_do_register_interrupt(block, offs + CPT_AF_INT_VEC_RVU, @@ -290,7 +348,7 @@ static int get_cpt_pf_num(struct rvu *rvu) static bool is_cpt_pf(struct rvu *rvu, u16 pcifunc) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; if (rvu_get_pf(pcifunc) != cpt_pf_num) return false; @@ -302,7 +360,7 @@ static bool is_cpt_pf(struct rvu *rvu, u16 pcifunc) static bool is_cpt_vf(struct rvu *rvu, u16 pcifunc) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; if (rvu_get_pf(pcifunc) != cpt_pf_num) return false; @@ -801,6 +859,64 @@ int rvu_mbox_handler_cpt_ctx_cache_sync(struct rvu *rvu, struct msg_req *req, return rvu_cpt_ctx_flush(rvu, req->hdr.pcifunc); } +int rvu_mbox_handler_cpt_lf_reset(struct rvu *rvu, struct cpt_lf_rst_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + int cptlf, blkaddr, ret; + u16 actual_slot; + u64 ctl, ctl2; + + blkaddr = rvu_get_blkaddr_from_slot(rvu, BLKTYPE_CPT, pcifunc, + req->slot, &actual_slot); + if (blkaddr < 0) + return CPT_AF_ERR_LF_INVALID; + + block = &rvu->hw->block[blkaddr]; + + cptlf = rvu_get_lf(rvu, block, pcifunc, actual_slot); + if (cptlf < 0) + return CPT_AF_ERR_LF_INVALID; + ctl = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf)); + ctl2 = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf)); + + ret = rvu_lf_reset(rvu, block, cptlf); + if (ret) + dev_err(rvu->dev, "Failed to reset blkaddr %d LF%d\n", + block->addr, cptlf); + + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), ctl); + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf), ctl2); + + return 0; +} + +int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_req *req, + struct cpt_flt_eng_info_rsp *rsp) +{ + struct rvu_block *block; + unsigned long flags; + int blkaddr, vec; + + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) + return blkaddr; + + block = &rvu->hw->block[blkaddr]; + for (vec = 0; vec < CPT_10K_AF_INT_VEC_RVU; vec++) { + spin_lock_irqsave(&rvu->cpt_intr_lock, flags); + rsp->flt_eng_map[vec] = block->cpt_flt_eng_map[vec]; + rsp->rcvrd_eng_map[vec] = block->cpt_rcvrd_eng_map[vec]; + if (req->reset) { + block->cpt_flt_eng_map[vec] = 0x0; + block->cpt_rcvrd_eng_map[vec] = 0x0; + } + spin_unlock_irqrestore(&rvu->cpt_intr_lock, flags); + } + return 0; +} + static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr) { struct cpt_rxc_time_cfg_req req; @@ -940,7 +1056,7 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int s static int cpt_inline_inb_lf_cmd_send(struct rvu *rvu, int blkaddr, int nix_blkaddr) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; struct cpt_inst_lmtst_req *req; dma_addr_t res_daddr; int timeout = 3000; @@ -1084,3 +1200,12 @@ unlock: return 0; } + +int rvu_cpt_init(struct rvu *rvu) +{ + /* Retrieve CPT PF number */ + rvu->cpt_pf_num = get_cpt_pf_num(rvu); + spin_lock_init(&rvu->cpt_intr_lock); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 0bcf3e559280..55639c133dd0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -437,6 +437,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, return; } + /* AF modifies given action iff PF/VF has requested for it */ + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) + return; + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); @@ -2678,18 +2682,17 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu, rsp->entry = NPC_MCAM_ENTRY_INVALID; rsp->free_count = 0; - /* Check if ref_entry is within range */ - if (req->priority && req->ref_entry >= mcam->bmap_entries) { - dev_err(rvu->dev, "%s: reference entry %d is out of range\n", - __func__, req->ref_entry); - return NPC_MCAM_INVALID_REQ; - } + /* Check if ref_entry is greater that the range + * then set it to max value. + */ + if (req->ref_entry > mcam->bmap_entries) + req->ref_entry = mcam->bmap_entries; /* ref_entry can't be '0' if requested priority is high. * Can't be last entry if requested priority is low. */ if ((!req->ref_entry && req->priority == NPC_MCAM_HIGHER_PRIO) || - ((req->ref_entry == (mcam->bmap_entries - 1)) && + ((req->ref_entry == mcam->bmap_entries) && req->priority == NPC_MCAM_LOWER_PRIO)) return NPC_MCAM_INVALID_REQ; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 0f896f606c3e..c00d6d67db51 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -930,8 +930,11 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) if (pfvf->ptp && qidx < pfvf->hw.tx_queues) { err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt, sizeof(*sq->timestamps)); - if (err) + if (err) { + kfree(sq->sg); + sq->sg = NULL; return err; + } } sq->head = 0; @@ -947,7 +950,14 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) sq->stats.bytes = 0; sq->stats.pkts = 0; - return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); + err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); + if (err) { + kfree(sq->sg); + sq->sg = NULL; + return err; + } + + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 44950c2542bb..c15d1864a637 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -785,7 +785,7 @@ static inline int otx2_sync_mbox_up_msg(struct mbox *mbox, int devid) if (!otx2_mbox_nonempty(&mbox->mbox_up, devid)) return 0; - otx2_mbox_msg_send(&mbox->mbox_up, devid); + otx2_mbox_msg_send_up(&mbox->mbox_up, devid); err = otx2_mbox_wait_for_rsp(&mbox->mbox_up, devid); if (err) return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a2d8ac620405..7e2c30927c31 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -272,8 +272,7 @@ static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) { int vf; - pf->flr_wq = alloc_workqueue("otx2_pf_flr_wq", - WQ_UNBOUND | WQ_HIGHPRI, 1); + pf->flr_wq = alloc_ordered_workqueue("otx2_pf_flr_wq", WQ_HIGHPRI); if (!pf->flr_wq) return -ENOMEM; @@ -292,8 +291,8 @@ static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) return 0; } -static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq, - int first, int mdevs, u64 intr, int type) +static void otx2_queue_vf_work(struct mbox *mw, struct workqueue_struct *mbox_wq, + int first, int mdevs, u64 intr) { struct otx2_mbox_dev *mdev; struct otx2_mbox *mbox; @@ -307,40 +306,26 @@ static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq, mbox = &mw->mbox; mdev = &mbox->dev[i]; - if (type == TYPE_PFAF) - otx2_sync_mbox_bbuf(mbox, i); hdr = mdev->mbase + mbox->rx_start; /* The hdr->num_msgs is set to zero immediately in the interrupt - * handler to ensure that it holds a correct value next time - * when the interrupt handler is called. - * pf->mbox.num_msgs holds the data for use in pfaf_mbox_handler - * pf>mbox.up_num_msgs holds the data for use in - * pfaf_mbox_up_handler. + * handler to ensure that it holds a correct value next time + * when the interrupt handler is called. pf->mw[i].num_msgs + * holds the data for use in otx2_pfvf_mbox_handler and + * pf->mw[i].up_num_msgs holds the data for use in + * otx2_pfvf_mbox_up_handler. */ if (hdr->num_msgs) { mw[i].num_msgs = hdr->num_msgs; hdr->num_msgs = 0; - if (type == TYPE_PFAF) - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), - sizeof(u64))); - queue_work(mbox_wq, &mw[i].mbox_wrk); } mbox = &mw->mbox_up; mdev = &mbox->dev[i]; - if (type == TYPE_PFAF) - otx2_sync_mbox_bbuf(mbox, i); hdr = mdev->mbase + mbox->rx_start; if (hdr->num_msgs) { mw[i].up_num_msgs = hdr->num_msgs; hdr->num_msgs = 0; - if (type == TYPE_PFAF) - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), - sizeof(u64))); - queue_work(mbox_wq, &mw[i].mbox_up_wrk); } } @@ -356,8 +341,10 @@ static void otx2_forward_msg_pfvf(struct otx2_mbox_dev *mdev, /* Msgs are already copied, trigger VF's mbox irq */ smp_wmb(); + otx2_mbox_wait_for_zero(pfvf_mbox, devid); + offset = pfvf_mbox->trigger | (devid << pfvf_mbox->tr_shift); - writeq(1, (void __iomem *)pfvf_mbox->reg_base + offset); + writeq(MBOX_DOWN_MSG, (void __iomem *)pfvf_mbox->reg_base + offset); /* Restore VF's mbox bounce buffer region address */ src_mdev->mbase = bbuf_base; @@ -547,7 +534,7 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) end: offset = mbox->rx_start + msg->next_msgoff; if (mdev->msgs_acked == (vf_mbox->up_num_msgs - 1)) - __otx2_mbox_reset(mbox, 0); + __otx2_mbox_reset(mbox, vf_idx); mdev->msgs_acked++; } } @@ -564,8 +551,7 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) if (vfs > 64) { intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(1)); otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), intr); - otx2_queue_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr, - TYPE_PFVF); + otx2_queue_vf_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr); if (intr) trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); vfs = 64; @@ -574,7 +560,7 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(0)); otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(0), intr); - otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF); + otx2_queue_vf_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr); if (intr) trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); @@ -599,7 +585,7 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) pf->mbox_pfvf_wq = alloc_workqueue("otx2_pfvf_mailbox", WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + WQ_MEM_RECLAIM, 0); if (!pf->mbox_pfvf_wq) return -ENOMEM; @@ -822,20 +808,22 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work) struct mbox *af_mbox; struct otx2_nic *pf; int offset, id; + u16 num_msgs; af_mbox = container_of(work, struct mbox, mbox_wrk); mbox = &af_mbox->mbox; mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + num_msgs = rsp_hdr->num_msgs; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); pf = af_mbox->pfvf; - for (id = 0; id < af_mbox->num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2_process_pfaf_mbox_msg(pf, msg); offset = mbox->rx_start + msg->next_msgoff; - if (mdev->msgs_acked == (af_mbox->num_msgs - 1)) + if (mdev->msgs_acked == (num_msgs - 1)) __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } @@ -946,12 +934,14 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) int offset, id, devid = 0; struct mbox_hdr *rsp_hdr; struct mbox_msghdr *msg; + u16 num_msgs; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + num_msgs = rsp_hdr->num_msgs; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < af_mbox->up_num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); devid = msg->pcifunc & RVU_PFVF_FUNC_MASK; @@ -960,10 +950,11 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) otx2_process_mbox_msg_up(pf, msg); offset = mbox->rx_start + msg->next_msgoff; } - if (devid) { + /* Forward to VF iff VFs are really present */ + if (devid && pci_num_vf(pf->pdev)) { otx2_forward_vf_mbox_msgs(pf, &pf->mbox.mbox_up, MBOX_DIR_PFVF_UP, devid - 1, - af_mbox->up_num_msgs); + num_msgs); return; } @@ -973,16 +964,49 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq) { struct otx2_nic *pf = (struct otx2_nic *)pf_irq; - struct mbox *mbox; + struct mbox *mw = &pf->mbox; + struct otx2_mbox_dev *mdev; + struct otx2_mbox *mbox; + struct mbox_hdr *hdr; + u64 mbox_data; /* Clear the IRQ */ otx2_write64(pf, RVU_PF_INT, BIT_ULL(0)); - mbox = &pf->mbox; - trace_otx2_msg_interrupt(mbox->mbox.pdev, "AF to PF", BIT_ULL(0)); + mbox_data = otx2_read64(pf, RVU_PF_PFAF_MBOX0); - otx2_queue_work(mbox, pf->mbox_wq, 0, 1, 1, TYPE_PFAF); + if (mbox_data & MBOX_UP_MSG) { + mbox_data &= ~MBOX_UP_MSG; + otx2_write64(pf, RVU_PF_PFAF_MBOX0, mbox_data); + + mbox = &mw->mbox_up; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(pf->mbox_wq, &mw->mbox_up_wrk); + + trace_otx2_msg_interrupt(pf->pdev, "UP message from AF to PF", + BIT_ULL(0)); + } + + if (mbox_data & MBOX_DOWN_MSG) { + mbox_data &= ~MBOX_DOWN_MSG; + otx2_write64(pf, RVU_PF_PFAF_MBOX0, mbox_data); + + mbox = &mw->mbox; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(pf->mbox_wq, &mw->mbox_wrk); + + trace_otx2_msg_interrupt(pf->pdev, "DOWN reply from AF to PF", + BIT_ULL(0)); + } return IRQ_HANDLED; } @@ -1063,9 +1087,8 @@ static int otx2_pfaf_mbox_init(struct otx2_nic *pf) int err; mbox->pfvf = pf; - pf->mbox_wq = alloc_workqueue("otx2_pfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_wq = alloc_ordered_workqueue("otx2_pfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_wq) return -ENOMEM; @@ -3030,6 +3053,7 @@ static void otx2_vf_link_event_task(struct work_struct *work) struct otx2_vf_config *config; struct cgx_link_info_msg *req; struct mbox_msghdr *msghdr; + struct delayed_work *dwork; struct otx2_nic *pf; int vf_idx; @@ -3038,10 +3062,24 @@ static void otx2_vf_link_event_task(struct work_struct *work) vf_idx = config - config->pf->vf_configs; pf = config->pf; + if (config->intf_down) + return; + + mutex_lock(&pf->mbox.lock); + + dwork = &config->link_event_work; + + if (!otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx)) { + schedule_delayed_work(dwork, msecs_to_jiffies(100)); + mutex_unlock(&pf->mbox.lock); + return; + } + msghdr = otx2_mbox_alloc_msg_rsp(&pf->mbox_pfvf[0].mbox_up, vf_idx, sizeof(*req), sizeof(struct msg_rsp)); if (!msghdr) { dev_err(pf->dev, "Failed to create VF%d link event\n", vf_idx); + mutex_unlock(&pf->mbox.lock); return; } @@ -3050,7 +3088,11 @@ static void otx2_vf_link_event_task(struct work_struct *work) req->hdr.sig = OTX2_MBOX_REQ_SIG; memcpy(&req->link_info, &pf->linfo, sizeof(req->link_info)); + otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx); + otx2_sync_mbox_up_msg(&pf->mbox_pfvf[0], vf_idx); + + mutex_unlock(&pf->mbox.lock); } static int otx2_sriov_enable(struct pci_dev *pdev, int numvfs) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 404855bccb4b..dcb8190de240 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -89,16 +89,20 @@ static void otx2vf_vfaf_mbox_handler(struct work_struct *work) struct otx2_mbox *mbox; struct mbox *af_mbox; int offset, id; + u16 num_msgs; af_mbox = container_of(work, struct mbox, mbox_wrk); mbox = &af_mbox->mbox; mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (af_mbox->num_msgs == 0) + num_msgs = rsp_hdr->num_msgs; + + if (num_msgs == 0) return; + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < af_mbox->num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2vf_process_vfaf_mbox_msg(af_mbox->pfvf, msg); offset = mbox->rx_start + msg->next_msgoff; @@ -151,6 +155,7 @@ static void otx2vf_vfaf_mbox_up_handler(struct work_struct *work) struct mbox *vf_mbox; struct otx2_nic *vf; int offset, id; + u16 num_msgs; vf_mbox = container_of(work, struct mbox, mbox_up_wrk); vf = vf_mbox->pfvf; @@ -158,12 +163,14 @@ static void otx2vf_vfaf_mbox_up_handler(struct work_struct *work) mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (vf_mbox->up_num_msgs == 0) + num_msgs = rsp_hdr->num_msgs; + + if (num_msgs == 0) return; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < vf_mbox->up_num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2vf_process_mbox_msg_up(vf, msg); offset = mbox->rx_start + msg->next_msgoff; @@ -178,40 +185,48 @@ static irqreturn_t otx2vf_vfaf_mbox_intr_handler(int irq, void *vf_irq) struct otx2_mbox_dev *mdev; struct otx2_mbox *mbox; struct mbox_hdr *hdr; + u64 mbox_data; /* Clear the IRQ */ otx2_write64(vf, RVU_VF_INT, BIT_ULL(0)); + mbox_data = otx2_read64(vf, RVU_VF_VFPF_MBOX0); + /* Read latest mbox data */ smp_rmb(); - /* Check for PF => VF response messages */ - mbox = &vf->mbox.mbox; - mdev = &mbox->dev[0]; - otx2_sync_mbox_bbuf(mbox, 0); + if (mbox_data & MBOX_DOWN_MSG) { + mbox_data &= ~MBOX_DOWN_MSG; + otx2_write64(vf, RVU_VF_VFPF_MBOX0, mbox_data); - trace_otx2_msg_interrupt(mbox->pdev, "PF to VF", BIT_ULL(0)); + /* Check for PF => VF response messages */ + mbox = &vf->mbox.mbox; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); - hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (hdr->num_msgs) { - vf->mbox.num_msgs = hdr->num_msgs; - hdr->num_msgs = 0; - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), sizeof(u64))); - queue_work(vf->mbox_wq, &vf->mbox.mbox_wrk); + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(vf->mbox_wq, &vf->mbox.mbox_wrk); + + trace_otx2_msg_interrupt(mbox->pdev, "DOWN reply from PF to VF", + BIT_ULL(0)); } - /* Check for PF => VF notification messages */ - mbox = &vf->mbox.mbox_up; - mdev = &mbox->dev[0]; - otx2_sync_mbox_bbuf(mbox, 0); - hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (hdr->num_msgs) { - vf->mbox.up_num_msgs = hdr->num_msgs; - hdr->num_msgs = 0; - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), sizeof(u64))); - queue_work(vf->mbox_wq, &vf->mbox.mbox_up_wrk); + if (mbox_data & MBOX_UP_MSG) { + mbox_data &= ~MBOX_UP_MSG; + otx2_write64(vf, RVU_VF_VFPF_MBOX0, mbox_data); + + /* Check for PF => VF notification messages */ + mbox = &vf->mbox.mbox_up; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(vf->mbox_wq, &vf->mbox.mbox_up_wrk); + + trace_otx2_msg_interrupt(mbox->pdev, "UP message from PF to VF", + BIT_ULL(0)); } return IRQ_HANDLED; @@ -293,9 +308,8 @@ static int otx2vf_vfaf_mbox_init(struct otx2_nic *vf) int err; mbox->pfvf = vf; - vf->mbox_wq = alloc_workqueue("otx2_vfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + vf->mbox_wq = alloc_ordered_workqueue("otx2_vfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!vf->mbox_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 17e6ac4445af..fecf3dd22dfa 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -561,8 +561,7 @@ static int mtk_mac_finish(struct phylink_config *config, unsigned int mode, mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); mcr_new = mcr_cur; mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | - MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK | - MAC_MCR_RX_FIFO_CLR_DIS; + MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_RX_FIFO_CLR_DIS; /* Only update control register when needed! */ if (mcr_new != mcr_cur) @@ -610,7 +609,7 @@ static void mtk_mac_link_down(struct phylink_config *config, unsigned int mode, phylink_config); u32 mcr = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); - mcr &= ~(MAC_MCR_TX_EN | MAC_MCR_RX_EN); + mcr &= ~(MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_FORCE_LINK); mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } @@ -649,7 +648,7 @@ static void mtk_mac_link_up(struct phylink_config *config, if (rx_pause) mcr |= MAC_MCR_FORCE_RX_FC; - mcr |= MAC_MCR_TX_EN | MAC_MCR_RX_EN; + mcr |= MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_FORCE_LINK; mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index d6eed204574a..c64211e22ae7 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -811,7 +811,7 @@ void mtk_ppe_start(struct mtk_ppe *ppe) MTK_PPE_KEEPALIVE_DISABLE) | FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) | FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE, - MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) | + MTK_PPE_SCAN_MODE_CHECK_AGE) | FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM, MTK_PPE_ENTRIES_SHIFT); if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2)) @@ -895,17 +895,21 @@ int mtk_ppe_stop(struct mtk_ppe *ppe) mtk_ppe_cache_enable(ppe, false); - /* disable offload engine */ - ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN); - ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0); - /* disable aging */ val = MTK_PPE_TB_CFG_AGE_NON_L4 | MTK_PPE_TB_CFG_AGE_UNBIND | MTK_PPE_TB_CFG_AGE_TCP | MTK_PPE_TB_CFG_AGE_UDP | - MTK_PPE_TB_CFG_AGE_TCP_FIN; + MTK_PPE_TB_CFG_AGE_TCP_FIN | + MTK_PPE_TB_CFG_SCAN_MODE; ppe_clear(ppe, MTK_PPE_TB_CFG, val); - return mtk_ppe_wait_busy(ppe); + if (mtk_ppe_wait_busy(ppe)) + return -ETIMEDOUT; + + /* disable offload engine */ + ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN); + ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0); + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index e1283531e0b8..671adbad0a40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -436,6 +436,7 @@ static int fs_any_create_groups(struct mlx5e_flow_table *ft) in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 29dd3a04c154..d3de1b7a80bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -990,8 +990,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); bool allow_swp; - allow_swp = - mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev); + allow_swp = mlx5_geneve_tx_allowed(mdev) || + (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO); mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 16bcceec16c4..785f188148d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -34,7 +34,6 @@ #ifndef __MLX5E_IPSEC_H__ #define __MLX5E_IPSEC_H__ -#ifdef CONFIG_MLX5_EN_IPSEC #include #include @@ -146,6 +145,7 @@ struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_modify_state_work modify_work; }; +#ifdef CONFIG_MLX5_EN_IPSEC int mlx5e_ipsec_init(struct mlx5e_priv *priv); void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index dc0a0a27ac84..58eacba6de8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -255,11 +255,13 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in || !ft->g) { - kfree(ft->g); - kvfree(in); + if (!ft->g) return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free_g; } mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); @@ -279,7 +281,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, break; default: err = -EINVAL; - goto out; + goto err_free_in; } switch (type) { @@ -301,7 +303,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, break; default: err = -EINVAL; - goto out; + goto err_free_in; } MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -310,7 +312,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err; + goto err_clean_group; ft->num_groups++; memset(in, 0, inlen); @@ -319,18 +321,20 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err; + goto err_clean_group; ft->num_groups++; kvfree(in); return 0; -err: +err_clean_group: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; -out: +err_free_in: kvfree(in); - +err_free_g: + kfree(ft->g); + ft->g = NULL; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index c971ff04dd04..c215252f2f53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -98,7 +98,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); - MLX5_SET(cqc, cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index a3e7602b044e..bf7517725d8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -673,6 +673,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, switch (action_type) { case DR_ACTION_TYP_DROP: attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; break; case DR_ACTION_TYP_FT: dest_action = action; @@ -761,11 +762,17 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action->sampler->tx_icm_addr; break; case DR_ACTION_TYP_VPORT: - attr.hit_gvmi = action->vport->caps->vhca_gvmi; - dest_action = action; - attr.final_icm_addr = rx_rule ? - action->vport->caps->icm_address_rx : - action->vport->caps->icm_address_tx; + if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) { + /* can't go to uplink on RX rule - dropping instead */ + attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; + } else { + attr.hit_gvmi = action->vport->caps->vhca_gvmi; + dest_action = action; + attr.final_icm_addr = rx_rule ? + action->vport->caps->icm_address_rx : + action->vport->caps->icm_address_tx; + } break; case DR_ACTION_TYP_POP_VLAN: if (!rx_rule && !(dmn->ste_ctx->actions_caps & diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 3b9ba8fa247a..41eac7dfb67e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -52,8 +52,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, max_regions = max_tcam_regions; tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); - if (!tcam->used_regions) - return -ENOMEM; + if (!tcam->used_regions) { + err = -ENOMEM; + goto err_alloc_used_regions; + } tcam->max_regions = max_regions; max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); @@ -65,6 +67,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, tcam->max_groups = max_groups; tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUP_SIZE); + tcam->max_group_size = min_t(unsigned int, tcam->max_group_size, + MLXSW_REG_PAGT_ACL_MAX_NUM); err = ops->init(mlxsw_sp, tcam->priv, tcam); if (err) @@ -76,6 +80,8 @@ err_tcam_init: bitmap_free(tcam->used_groups); err_alloc_used_groups: bitmap_free(tcam->used_regions); +err_alloc_used_regions: + mutex_destroy(&tcam->lock); return err; } @@ -84,10 +90,10 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - mutex_destroy(&tcam->lock); ops->fini(mlxsw_sp, tcam->priv); bitmap_free(tcam->used_groups); bitmap_free(tcam->used_regions); + mutex_destroy(&tcam->lock); } int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c index 41fa2523d91d..5f2cd9a8cf8f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c @@ -37,19 +37,24 @@ static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < lan966x->num_phys_ports; ++lag) { - struct net_device *bond = lan966x->ports[lag]->bond; + struct lan966x_port *port = lan966x->ports[lag]; int num_active_ports = 0; + struct net_device *bond; unsigned long bond_mask; u8 aggr_idx[16]; - if (!bond || (visited & BIT(lag))) + if (!port || !port->bond || (visited & BIT(lag))) continue; + bond = port->bond; bond_mask = lan966x_lag_get_mask(lan966x, bond); for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) { struct lan966x_port *port = lan966x->ports[p]; + if (!port) + continue; + lan_wr(ANA_PGID_PGID_SET(bond_mask), lan966x, ANA_PGID(p)); if (port->lag_tx_active) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 0050fcb988b7..8cc5172833a9 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -168,9 +168,10 @@ static void lan966x_port_link_up(struct lan966x_port *port) lan966x_taprio_speed_set(port, config->speed); /* Also the GIGA_MODE_ENA(1) needs to be set regardless of the - * port speed for QSGMII ports. + * port speed for QSGMII or SGMII ports. */ - if (phy_interface_num_ports(config->portmode) == 4) + if (phy_interface_num_ports(config->portmode) == 4 || + config->portmode == PHY_INTERFACE_MODE_SGMII) mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1); lan_wr(config->duplex | mode, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 4af285918ea2..75868b3f548e 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, list) { if ((vid == 0 || mact_entry->vid == vid) && ether_addr_equal(addr, mact_entry->mac)) { + sparx5_mact_forget(sparx5, addr, mact_entry->vid); + list_del(&mact_entry->list); devm_kfree(sparx5->dev, mact_entry); - - sparx5_mact_forget(sparx5, addr, mact_entry->vid); } } mutex_unlock(&sparx5->mact_lock); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 3423c95cc84a..7031f41287e0 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -744,6 +744,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sparx5); sparx5->pdev = pdev; sparx5->dev = &pdev->dev; + spin_lock_init(&sparx5->tx_lock); /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 7a83222caa73..cb3173d2b0e8 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -278,6 +278,7 @@ struct sparx5 { int xtr_irq; /* Frame DMA */ int fdma_irq; + spinlock_t tx_lock; /* lock for frame transmission */ struct sparx5_rx rx; struct sparx5_tx tx; /* PTP */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 6db6ac6a3bbc..ac7e1cffbcec 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); + spin_lock(&sparx5->tx_lock); if (sparx5->fdma_irq > 0) ret = sparx5_fdma_xmit(sparx5, ifh, skb); else ret = sparx5_inject(sparx5, ifh, skb, dev); + spin_unlock(&sparx5->tx_lock); if (ret == -EBUSY) goto busy; diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index f7492be452ae..497766ecdd91 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1243,7 +1243,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, /* Checks that the chain_index of the filter matches the * chain_index of the GOTO action. */ - if (post_ct_entry->chain_index != pre_ct_entry->chain_index) + if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index) return -EINVAL; err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); @@ -1379,10 +1379,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_ mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); return; + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); - mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); + /* The mask of mangle action is inverse mask, + * so clear the dest tp port with 0xFFFF to + * instead of rotate-left operation. + */ + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); + } + if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { + mangle_action->mangle.offset = 0; + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask); + } return; default: @@ -1756,7 +1776,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, if (IS_ERR(ct_entry)) return PTR_ERR(ct_entry); ct_entry->type = CT_TYPE_PRE_CT; - ct_entry->chain_index = ct_goto->chain_index; + ct_entry->chain_index = flow->common.chain_index; + ct_entry->goto_chain_index = ct_goto->chain_index; list_add(&ct_entry->list_node, &zt->pre_ct_list); zt->pre_ct_count++; @@ -1776,9 +1797,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_fl_ct_flow_entry *ct_entry; + struct flow_action_entry *ct_goto; struct nfp_fl_ct_zone_entry *zt; + struct flow_action_entry *act; bool wildcarded = false; struct flow_match_ct ct; + int i; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_MIRRED_INGRESS: + if (act->dev->rtnl_link_ops && + !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: out port is openvswitch internal port"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + } flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { @@ -1803,6 +1845,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, ct_entry->type = CT_TYPE_POST_CT; ct_entry->chain_index = flow->common.chain_index; + ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); + ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0; list_add(&ct_entry->list_node, &zt->post_ct_list); zt->post_ct_count++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index 762c0b36e269..9440ab776ece 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -112,6 +112,7 @@ enum nfp_nfp_layer_name { * @cookie: Flow cookie, same as original TC flow, used as key * @list_node: Used by the list * @chain_index: Chain index of the original flow + * @goto_chain_index: goto chain index of the flow * @netdev: netdev structure. * @type: Type of pre-entry from enum ct_entry_type * @zt: Reference to the zone table this belongs to @@ -125,6 +126,7 @@ struct nfp_fl_ct_flow_entry { unsigned long cookie; struct list_head list_node; u32 chain_index; + u32 goto_chain_index; enum ct_entry_type type; struct net_device *netdev; struct nfp_fl_ct_zone_entry *zt; diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c index e92860e20a24..c6a2c302a8c8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -308,6 +308,11 @@ static void nfp_fl_lag_do_work(struct work_struct *work) acti_netdevs = kmalloc_array(entry->slave_cnt, sizeof(*acti_netdevs), GFP_KERNEL); + if (!acti_netdevs) { + schedule_delayed_work(&lag->work, + NFP_FL_LAG_DELAY); + continue; + } /* Include sanity check in the loop. It may be that a bond has * changed between processing the last notification and the diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 52f67157bd0f..a3c52c91a575 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -980,7 +980,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); - if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { + if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) { if (entry->bridge_count || !nfp_flower_is_supported_bridge(netdev)) { nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 33b4c2856316..3f10c5365c80 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) const u32 barcfg_msix_general = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT; + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT); const u32 barcfg_msix_xpb = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT | + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) | NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress( NFP_CPP_TARGET_ISLAND_XPB); const u32 barcfg_explicit[4] = { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index ce436e97324a..4b9caec6eb9b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -380,6 +380,10 @@ static void ionic_remove(struct pci_dev *pdev) del_timer_sync(&ionic->watchdog_timer); if (ionic->lif) { + /* prevent adminq cmds if already known as down */ + if (test_and_clear_bit(IONIC_LIF_F_FW_RESET, ionic->lif->state)) + set_bit(IONIC_LIF_F_FW_STOPPING, ionic->lif->state); + ionic_lif_unregister(ionic->lif); ionic_devlink_unregister(ionic); ionic_lif_deinit(ionic->lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 344a3924627d..7adfcd7c2f3a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -293,6 +293,7 @@ void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp) void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd) { + idev->opcode = cmd->cmd.opcode; memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd)); iowrite32(0, &idev->dev_cmd_regs->done); iowrite32(1, &idev->dev_cmd_regs->doorbell); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 13dfcf9f75da..c10da629ef6e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -150,6 +150,7 @@ struct ionic_dev { bool fw_hb_ready; bool fw_status_ready; u8 fw_generation; + u8 opcode; u64 __iomem *db_pages; dma_addr_t phy_db_pages; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index f7634884c750..fcc3faecb060 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3008,6 +3008,9 @@ static void ionic_lif_reset(struct ionic_lif *lif) { struct ionic_dev *idev = &lif->ionic->idev; + if (!ionic_is_fw_running(idev)) + return; + mutex_lock(&lif->ionic->dev_cmd_lock); ionic_dev_cmd_lif_reset(idev, lif->index); ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 79272f5f380c..d2038ff316ca 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -410,22 +410,28 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, do_msg); } -int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +static int __ionic_adminq_post_wait(struct ionic_lif *lif, + struct ionic_admin_ctx *ctx, + const bool do_msg) { int err; + if (!ionic_is_fw_running(&lif->ionic->idev)) + return 0; + err = ionic_adminq_post(lif, ctx); - return ionic_adminq_wait(lif, ctx, err, true); + return ionic_adminq_wait(lif, ctx, err, do_msg); +} + +int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +{ + return __ionic_adminq_post_wait(lif, ctx, true); } int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) { - int err; - - err = ionic_adminq_post(lif, ctx); - - return ionic_adminq_wait(lif, ctx, err, false); + return __ionic_adminq_post_wait(lif, ctx, false); } static void ionic_dev_cmd_clean(struct ionic *ionic) @@ -465,7 +471,7 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, */ max_wait = jiffies + (max_seconds * HZ); try_again: - opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode); + opcode = idev->opcode; start_time = jiffies; for (fw_up = ionic_is_fw_running(idev); !done && fw_up && time_before(jiffies, max_wait); diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 0dbcb9c83f46..a3209446bb93 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -217,6 +217,7 @@ struct stmmac_safety_stats { unsigned long mac_errors[32]; unsigned long mtl_errors[32]; unsigned long dma_errors[32]; + unsigned long dma_dpp_errors[32]; }; /* Number of fields in Safety Stats */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 880a75bf2eb1..8748c37e9dac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -282,6 +282,8 @@ #define XGMAC_RXCEIE BIT(4) #define XGMAC_TXCEIE BIT(0) #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc +#define XGMAC_MTL_DPP_CONTROL 0x000010e0 +#define XGMAC_DPP_DISABLE BIT(0) #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x))) #define XGMAC_TQS GENMASK(25, 16) #define XGMAC_TQS_SHIFT 16 @@ -364,6 +366,7 @@ #define XGMAC_DCEIE BIT(1) #define XGMAC_TCEIE BIT(0) #define XGMAC_DMA_ECC_INT_STATUS 0x0000306c +#define XGMAC_DMA_DPP_INT_STATUS 0x00003074 #define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x))) #define XGMAC_SPH BIT(24) #define XGMAC_PBLx8 BIT(16) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index c2181c277291..ec1616ffbfa7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -789,6 +789,44 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= { { false, "UNKNOWN", "Unknown Error" }, /* 31 */ }; +#define DPP_RX_ERR "Read Rx Descriptor Parity checker Error" +#define DPP_TX_ERR "Read Tx Descriptor Parity checker Error" + +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = { + { true, "TDPES0", DPP_TX_ERR }, + { true, "TDPES1", DPP_TX_ERR }, + { true, "TDPES2", DPP_TX_ERR }, + { true, "TDPES3", DPP_TX_ERR }, + { true, "TDPES4", DPP_TX_ERR }, + { true, "TDPES5", DPP_TX_ERR }, + { true, "TDPES6", DPP_TX_ERR }, + { true, "TDPES7", DPP_TX_ERR }, + { true, "TDPES8", DPP_TX_ERR }, + { true, "TDPES9", DPP_TX_ERR }, + { true, "TDPES10", DPP_TX_ERR }, + { true, "TDPES11", DPP_TX_ERR }, + { true, "TDPES12", DPP_TX_ERR }, + { true, "TDPES13", DPP_TX_ERR }, + { true, "TDPES14", DPP_TX_ERR }, + { true, "TDPES15", DPP_TX_ERR }, + { true, "RDPES0", DPP_RX_ERR }, + { true, "RDPES1", DPP_RX_ERR }, + { true, "RDPES2", DPP_RX_ERR }, + { true, "RDPES3", DPP_RX_ERR }, + { true, "RDPES4", DPP_RX_ERR }, + { true, "RDPES5", DPP_RX_ERR }, + { true, "RDPES6", DPP_RX_ERR }, + { true, "RDPES7", DPP_RX_ERR }, + { true, "RDPES8", DPP_RX_ERR }, + { true, "RDPES9", DPP_RX_ERR }, + { true, "RDPES10", DPP_RX_ERR }, + { true, "RDPES11", DPP_RX_ERR }, + { true, "RDPES12", DPP_RX_ERR }, + { true, "RDPES13", DPP_RX_ERR }, + { true, "RDPES14", DPP_RX_ERR }, + { true, "RDPES15", DPP_RX_ERR }, +}; + static void dwxgmac3_handle_dma_err(struct net_device *ndev, void __iomem *ioaddr, bool correctable, struct stmmac_safety_stats *stats) @@ -800,6 +838,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev, dwxgmac3_log_error(ndev, value, correctable, "DMA", dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats); + + value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS); + writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS); + + dwxgmac3_log_error(ndev, value, false, "DMA_DPP", + dwxgmac3_dma_dpp_errors, + STAT_OFF(dma_dpp_errors), stats); } static int @@ -838,6 +883,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp, value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */ writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL); + /* 5. Enable Data Path Parity Protection */ + value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL); + /* already enabled by default, explicit enable it again */ + value &= ~XGMAC_DPP_DISABLE; + writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL); + return 0; } @@ -871,7 +922,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev, ret |= !corr; } - err = dma & (XGMAC_DEUIS | XGMAC_DECIS); + /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in + * DMA_Safety_Interrupt_Status, so we handle DMA Data Path + * Parity Errors here + */ + err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS); corr = dma & XGMAC_DECIS; if (err) { dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats); @@ -887,6 +942,7 @@ static const struct dwxgmac3_error { { dwxgmac3_mac_errors }, { dwxgmac3_mtl_errors }, { dwxgmac3_dma_errors }, + { dwxgmac3_dma_dpp_errors }, }; static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2aedd7f4132e..b2895dc870a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3837,6 +3837,9 @@ static int __stmmac_open(struct net_device *dev, priv->rx_copybreak = STMMAC_RX_COPYBREAK; buf_sz = dma_conf->dma_buf_sz; + for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) + if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) + dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf)); stmmac_reset_queues_param(priv); @@ -5836,11 +5839,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5856,11 +5854,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5882,11 +5875,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5913,11 +5901,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -7190,6 +7173,9 @@ int stmmac_dvr_probe(struct device *device, dev_err(priv->device, "unable to bring out of ahb reset: %pe\n", ERR_PTR(ret)); + /* Wait a bit for the reset to take effect */ + udelay(10); + /* Init MAC and get the capabilities */ ret = stmmac_hw_init(priv); if (ret) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 13c9c2d6b79b..d95771ca4e5a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } } + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 83596ec0c7cb..6e70aa1cc7bf 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -772,6 +772,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->slave_num); return; } + + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 6bf5e341c3c1..08c45756b218 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1445,7 +1445,7 @@ static int temac_probe(struct platform_device *pdev) } /* map device registers */ - lp->regs = devm_platform_ioremap_resource_byname(pdev, 0); + lp->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map TEMAC registers\n"); return -ENOMEM; diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index 704e949484d0..b9b5554ea862 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -221,21 +221,25 @@ static int fjes_hw_setup(struct fjes_hw *hw) mem_size = FJES_DEV_REQ_BUF_SIZE(hw->max_epid); hw->hw_info.req_buf = kzalloc(mem_size, GFP_KERNEL); - if (!(hw->hw_info.req_buf)) - return -ENOMEM; + if (!(hw->hw_info.req_buf)) { + result = -ENOMEM; + goto free_ep_info; + } hw->hw_info.req_buf_size = mem_size; mem_size = FJES_DEV_RES_BUF_SIZE(hw->max_epid); hw->hw_info.res_buf = kzalloc(mem_size, GFP_KERNEL); - if (!(hw->hw_info.res_buf)) - return -ENOMEM; + if (!(hw->hw_info.res_buf)) { + result = -ENOMEM; + goto free_req_buf; + } hw->hw_info.res_buf_size = mem_size; result = fjes_hw_alloc_shared_status_region(hw); if (result) - return result; + goto free_res_buf; hw->hw_info.buffer_share_bit = 0; hw->hw_info.buffer_unshare_reserve_bit = 0; @@ -246,11 +250,11 @@ static int fjes_hw_setup(struct fjes_hw *hw) result = fjes_hw_alloc_epbuf(&buf_pair->tx); if (result) - return result; + goto free_epbuf; result = fjes_hw_alloc_epbuf(&buf_pair->rx); if (result) - return result; + goto free_epbuf; spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&buf_pair->tx, mac, @@ -273,6 +277,25 @@ static int fjes_hw_setup(struct fjes_hw *hw) fjes_hw_init_command_registers(hw, ¶m); return 0; + +free_epbuf: + for (epidx = 0; epidx < hw->max_epid ; epidx++) { + if (epidx == hw->my_epid) + continue; + fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].tx); + fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].rx); + } + fjes_hw_free_shared_status_region(hw); +free_res_buf: + kfree(hw->hw_info.res_buf); + hw->hw_info.res_buf = NULL; +free_req_buf: + kfree(hw->hw_info.req_buf); + hw->hw_info.req_buf = NULL; +free_ep_info: + kfree(hw->ep_shm_info); + hw->ep_shm_info = NULL; + return result; } static void fjes_hw_cleanup(struct fjes_hw *hw) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f393e454f45c..3f8da6f0b25c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index bace989591f7..7086acfed5b9 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1902,26 +1902,26 @@ static int __init gtp_init(void) get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); - err = rtnl_link_register(>p_link_ops); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto error_out; + err = rtnl_link_register(>p_link_ops); + if (err < 0) + goto unreg_pernet_subsys; + err = genl_register_family(>p_genl_family); if (err < 0) goto unreg_rtnl_link; - err = register_pernet_subsys(>p_net_ops); - if (err < 0) - goto unreg_genl_family; - pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_genl_family: - genl_unregister_family(>p_genl_family); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); error_out: pr_err("error loading GTP module loaded\n"); return err; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index da737d959e81..3a834d4e1c84 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -740,7 +740,10 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) + napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 0285894c892a..1b7405539984 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -42,9 +42,13 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) +/* Macros to define the context of vf registration */ +#define VF_REG_IN_PROBE 1 +#define VF_REG_IN_NOTIFIER 2 + static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); -MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); unsigned int netvsc_ring_bytes __ro_after_init; static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | @@ -2181,7 +2185,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) } static int netvsc_vf_join(struct net_device *vf_netdev, - struct net_device *ndev) + struct net_device *ndev, int context) { struct net_device_context *ndev_ctx = netdev_priv(ndev); int ret; @@ -2204,7 +2208,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + /* If this registration is called from probe context vf_takeover + * is taken care of later in probe itself. + */ + if (context == VF_REG_IN_NOTIFIER) + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2342,7 +2350,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) return NOTIFY_DONE; } -static int netvsc_register_vf(struct net_device *vf_netdev) +static int netvsc_register_vf(struct net_device *vf_netdev, int context) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; @@ -2382,7 +2390,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - if (netvsc_vf_join(vf_netdev, ndev) != 0) + if (netvsc_vf_join(vf_netdev, ndev, context) != 0) return NOTIFY_DONE; dev_hold(vf_netdev); @@ -2480,10 +2488,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static int check_dev_is_matching_vf(struct net_device *event_ndev) +{ + /* Skip NetVSC interfaces */ + if (event_ndev->netdev_ops == &device_ops) + return -ENODEV; + + /* Avoid non-Ethernet type devices */ + if (event_ndev->type != ARPHRD_ETHER) + return -ENODEV; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_ndev)) + return -ENODEV; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (netif_is_bond_master(event_ndev)) + return -ENODEV; + + return 0; +} + static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - struct net_device *net = NULL; + struct net_device *net = NULL, *vf_netdev; struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info = NULL; struct netvsc_device *nvdev; @@ -2592,6 +2621,30 @@ static int netvsc_probe(struct hv_device *dev, } list_add(&net_device_ctx->list, &netvsc_dev_list); + + /* When the hv_netvsc driver is unloaded and reloaded, the + * NET_DEVICE_REGISTER for the vf device is replayed before probe + * is complete. This is because register_netdevice_notifier() gets + * registered before vmbus_driver_register() so that callback func + * is set before probe and we don't miss events like NETDEV_POST_INIT + * So, in this section we try to register the matching vf device that + * is present as a netdevice, knowing that its register call is not + * processed in the netvsc_netdev_notifier(as probing is progress and + * get_netvsc_byslot fails). + */ + for_each_netdev(dev_net(net), vf_netdev) { + ret = check_dev_is_matching_vf(vf_netdev); + if (ret != 0) + continue; + + if (net != get_netvsc_byslot(vf_netdev)) + continue; + + netvsc_prepare_bonding(vf_netdev); + netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); + __netvsc_vf_setup(net, vf_netdev); + break; + } rtnl_unlock(); netvsc_devinfo_put(device_info); @@ -2748,28 +2801,17 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + int ret = 0; - /* Skip our own events */ - if (event_dev->netdev_ops == &device_ops) - return NOTIFY_DONE; - - /* Avoid non-Ethernet type devices */ - if (event_dev->type != ARPHRD_ETHER) - return NOTIFY_DONE; - - /* Avoid Vlan dev with same MAC registering as VF */ - if (is_vlan_dev(event_dev)) - return NOTIFY_DONE; - - /* Avoid Bonding master dev with same MAC registering as VF */ - if (netif_is_bond_master(event_dev)) + ret = check_dev_is_matching_vf(event_dev); + if (ret != 0) return NOTIFY_DONE; switch (event) { case NETDEV_POST_INIT: return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: - return netvsc_register_vf(event_dev); + return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); case NETDEV_UNREGISTER: return netvsc_unregister_vf(event_dev); case NETDEV_UP: @@ -2801,7 +2843,7 @@ static int __init netvsc_drv_init(void) pr_info("Increased ring_size to %u (min allowed)\n", ring_size); } - netvsc_ring_bytes = ring_size * PAGE_SIZE; + netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096); register_netdevice_notifier(&netvsc_netdev_notifier); diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index c3fbdd6b68ba..f3fa4bd12116 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -835,14 +835,14 @@ static void nsim_dev_trap_report_work(struct work_struct *work) trap_report_dw.work); nsim_dev = nsim_trap_data->nsim_dev; - /* For each running port and enabled packet trap, generate a UDP - * packet with a random 5-tuple and report it. - */ if (!devl_trylock(priv_to_devlink(nsim_dev))) { - schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 0); + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 1); return; } + /* For each running port and enabled packet trap, generate a UDP + * packet with a random 5-tuple and report it. + */ list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { if (!netif_running(nsim_dev_port->ns->netdev)) continue; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index edd4b1e58d96..75868e63b81b 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -2051,7 +2051,7 @@ static struct phy_driver at803x_driver[] = { .write_page = at803x_write_page, .get_features = at803x_get_features, .read_status = at803x_read_status, - .config_intr = &at803x_config_intr, + .config_intr = at803x_config_intr, .handle_interrupt = at803x_handle_interrupt, .get_tunable = at803x_get_tunable, .set_tunable = at803x_set_tunable, @@ -2081,7 +2081,7 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .flags = PHY_POLL_CABLE_TEST, /* PHY_BASIC_FEATURES */ - .config_intr = &at803x_config_intr, + .config_intr = at803x_config_intr, .handle_interrupt = at803x_handle_interrupt, .cable_test_start = at803x_cable_test_start, .cable_test_get_status = at803x_cable_test_get_status, @@ -2097,7 +2097,7 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .flags = PHY_POLL_CABLE_TEST, /* PHY_BASIC_FEATURES */ - .config_intr = &at803x_config_intr, + .config_intr = at803x_config_intr, .handle_interrupt = at803x_handle_interrupt, .cable_test_start = at803x_cable_test_start, .cable_test_get_status = at803x_cable_test_get_status, diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 267e6fd3d444..57411ee1d837 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -380,7 +380,7 @@ static int dp83822_config_init(struct phy_device *phydev) { struct dp83822_private *dp83822 = phydev->priv; struct device *dev = &phydev->mdio.dev; - int rgmii_delay; + int rgmii_delay = 0; s32 rx_int_delay; s32 tx_int_delay; int err = 0; @@ -390,30 +390,33 @@ static int dp83822_config_init(struct phy_device *phydev) rx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, true); - if (rx_int_delay <= 0) - rgmii_delay = 0; - else - rgmii_delay = DP83822_RX_CLK_SHIFT; + /* Set DP83822_RX_CLK_SHIFT to enable rx clk internal delay */ + if (rx_int_delay > 0) + rgmii_delay |= DP83822_RX_CLK_SHIFT; tx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, false); + + /* Set DP83822_TX_CLK_SHIFT to disable tx clk internal delay */ if (tx_int_delay <= 0) - rgmii_delay &= ~DP83822_TX_CLK_SHIFT; - else rgmii_delay |= DP83822_TX_CLK_SHIFT; - if (rgmii_delay) { - err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, rgmii_delay); - if (err) - return err; - } + err = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + DP83822_RX_CLK_SHIFT | DP83822_TX_CLK_SHIFT, rgmii_delay); + if (err) + return err; - phy_set_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + + if (err) + return err; } else { - phy_clear_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + err = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + + if (err) + return err; } if (dp83822->fx_enabled) { diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 7cbcf51bae92..9481f172830f 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -120,6 +120,11 @@ */ #define LAN8814_1PPM_FORMAT 17179 +#define PTP_RX_VERSION 0x0248 +#define PTP_TX_VERSION 0x0288 +#define PTP_MAX_VERSION(x) (((x) & GENMASK(7, 0)) << 8) +#define PTP_MIN_VERSION(x) ((x) & GENMASK(7, 0)) + #define PTP_RX_MOD 0x024F #define PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_ BIT(3) #define PTP_RX_TIMESTAMP_EN 0x024D @@ -2922,6 +2927,12 @@ static void lan8814_ptp_init(struct phy_device *phydev) lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0); lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0); + /* Disable checking for minorVersionPTP field */ + lanphy_write_page_reg(phydev, 5, PTP_RX_VERSION, + PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); + lanphy_write_page_reg(phydev, 5, PTP_TX_VERSION, + PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); + skb_queue_head_init(&ptp_priv->tx_queue); skb_queue_head_init(&ptp_priv->rx_queue); INIT_LIST_HEAD(&ptp_priv->rx_ts_list); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 17e353cf1232..389f4e5256fa 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2698,8 +2698,8 @@ EXPORT_SYMBOL(genphy_resume); int genphy_loopback(struct phy_device *phydev, bool enable) { if (enable) { - u16 val, ctl = BMCR_LOOPBACK; - int ret; + u16 ctl = BMCR_LOOPBACK; + int ret, val; ctl |= mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); @@ -2951,7 +2951,7 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, if (delay < 0) return delay; - if (delay && size == 0) + if (size == 0) return delay; if (delay < delay_values[0] || delay > delay_values[size - 1]) { diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 7f3a9bd7612c..6582d331e72d 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -414,9 +414,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) ERR_PTR(ret)); return ret; } + + return genphy_soft_reset(phydev); } - return genphy_soft_reset(phydev); + return 0; } static int rtl821x_resume(struct phy_device *phydev) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 15a179631903..abc65c4d7a30 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -469,6 +469,10 @@ ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) case PPPIOCSMRU: if (get_user(val, p)) break; + if (val > U16_MAX) { + err = -EINVAL; + break; + } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d373953ddc30..922d6f16d99d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; + ntfile->xdp_rxq.queue_index = index; rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], NULL); @@ -1622,13 +1623,19 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, switch (act) { case XDP_REDIRECT: err = xdp_do_redirect(tun->dev, xdp, xdp_prog); - if (err) + if (err) { + dev_core_stats_rx_dropped_inc(tun->dev); return err; + } + dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_TX: err = tun_xdp_tx(tun->dev, xdp); - if (err < 0) + if (err < 0) { + dev_core_stats_rx_dropped_inc(tun->dev); return err; + } + dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_PASS: break; diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 5a1bf42ce156..d837c1887416 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1315,8 +1315,6 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) netif_set_tso_max_size(dev->net, 16384); - ax88179_reset(dev); - return 0; } diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 99ec1d4a972d..8b6d6a1b3c2e 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); - return err; + return 0; } netdev_dbg(dev->net, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index c458c030fadf..366e83ed0a97 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) lan78xx_rx_urb_submit_all(dev); + local_bh_disable(); napi_schedule(&dev->napi); + local_bh_enable(); } return 0; @@ -3035,7 +3037,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; - if (dev->chipid == ID_REV_CHIP_ID_7800_) { + if (dev->chipid == ID_REV_CHIP_ID_7800_ || + dev->chipid == ID_REV_CHIP_ID_7850_) { ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); if (!ret && sig != EEPROM_INDICATOR) { /* Implies there is no external eeprom. Set mac speed */ @@ -3134,7 +3137,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index a530f20ee257..2fa46baa589e 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -2104,6 +2104,11 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0424, 0x9E08), .driver_info = (unsigned long) &smsc95xx_info, }, + { + /* SYSTEC USB-SPEmodule1 10BASE-T1L Ethernet Device */ + USB_DEVICE(0x0878, 0x1400), + .driver_info = (unsigned long)&smsc95xx_info, + }, { /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */ USB_DEVICE(0x184F, 0x0051), diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index f5e19f3ef6cd..4de514482183 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -737,7 +737,9 @@ static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf) data->eeprom_len = SR9800_EEPROM_LEN; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + goto out; /* LED Setting Rule : * AABB:CCDD diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 36c5a41f84e4..8dcd3b6e143b 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1135,14 +1135,6 @@ static int veth_enable_xdp(struct net_device *dev) veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); return err; } - - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding XDP - * is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } } } @@ -1162,18 +1154,9 @@ static void veth_disable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); - if (!netif_running(dev) || !veth_gro_requested(dev)) { + if (!netif_running(dev) || !veth_gro_requested(dev)) veth_napi_del(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev) && netif_running(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - } - veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } @@ -1376,7 +1359,8 @@ static int veth_alloc_queues(struct net_device *dev) struct veth_priv *priv = netdev_priv(dev); int i; - priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); + priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!priv->rq) return -ENOMEM; @@ -1392,7 +1376,7 @@ static void veth_free_queues(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); - kfree(priv->rq); + kvfree(priv->rq); } static int veth_dev_init(struct net_device *dev) @@ -1460,8 +1444,6 @@ static netdev_features_t veth_fix_features(struct net_device *dev, if (peer_priv->_xdp_prog) features &= ~NETIF_F_GSO_SOFTWARE; } - if (priv->_xdp_prog) - features |= NETIF_F_GRO; return features; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 21d3461fb5d1..45f1a871b7da 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3474,10 +3474,11 @@ static int virtnet_find_vqs(struct virtnet_info *vi) { vq_callback_t **callbacks; struct virtqueue **vqs; - int ret = -ENOMEM; - int i, total_vqs; const char **names; + int ret = -ENOMEM; + int total_vqs; bool *ctx; + u16 i; /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by @@ -3514,8 +3515,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { callbacks[rxq2vq(i)] = skb_recv_done; callbacks[txq2vq(i)] = skb_xmit_done; - sprintf(vi->rq[i].name, "input.%d", i); - sprintf(vi->sq[i].name, "output.%d", i); + sprintf(vi->rq[i].name, "input.%u", i); + sprintf(vi->sq[i].name, "output.%u", i); names[rxq2vq(i)] = vi->rq[i].name; names[txq2vq(i)] = vi->sq[i].name; if (ctx) diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index 6d1bd9f52d02..81b716e6612e 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -164,8 +164,8 @@ get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) if (!allowedips_node) goto no_allowedips; if (!ctx->allowedips_seq) - ctx->allowedips_seq = peer->device->peer_allowedips.seq; - else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) + ctx->allowedips_seq = ctx->wg->peer_allowedips.seq; + else if (ctx->allowedips_seq != ctx->wg->peer_allowedips.seq) goto no_allowedips; allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); @@ -255,17 +255,17 @@ static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!peers_nest) goto out; ret = 0; - /* If the last cursor was removed via list_del_init in peer_remove, then + lockdep_assert_held(&wg->device_update_lock); + /* If the last cursor was removed in peer_remove or peer_remove_all, then * we just treat this the same as there being no more peers left. The * reason is that seq_nr should indicate to userspace that this isn't a * coherent dump anyway, so they'll try again. */ if (list_empty(&wg->peer_list) || - (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { + (ctx->next_peer && ctx->next_peer->is_dead)) { nla_nest_cancel(skb, peers_nest); goto out; } - lockdep_assert_held(&wg->device_update_lock); peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { if (get_peer(peer, skb, ctx)) { diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index a176653c8861..db01ec03bda0 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -251,7 +251,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) if (unlikely(!READ_ONCE(keypair->receiving.is_valid) || wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) || - keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) { + READ_ONCE(keypair->receiving_counter.counter) >= REJECT_AFTER_MESSAGES)) { WRITE_ONCE(keypair->receiving.is_valid, false); return false; } @@ -318,7 +318,7 @@ static bool counter_validate(struct noise_replay_counter *counter, u64 their_cou for (i = 1; i <= top; ++i) counter->backtrack[(i + index_current) & ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; - counter->counter = their_counter; + WRITE_ONCE(counter->counter, their_counter); } index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; @@ -463,7 +463,7 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", peer->device->dev->name, PACKET_CB(skb)->nonce, - keypair->receiving_counter.counter); + READ_ONCE(keypair->receiving_counter.counter)); goto next; } diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 876410a47d1d..4d5009604eee 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -844,6 +844,10 @@ ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev(struct ath10k *ar, struct sk_buff *skb, } ev = tb[WMI_TLV_TAG_STRUCT_MGMT_TX_COMPL_EVENT]; + if (!ev) { + kfree(tb); + return -EPROTO; + } arg->desc_id = ev->desc_id; arg->status = ev->status; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 21c6b36dc6eb..51fc77e93de5 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2112,6 +2112,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, mcs_160_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); + /* Initialize rx_mcs_160 to 9 which is an invalid value */ + rx_mcs_160 = 9; if (support_160) { for (i = 7; i >= 0; i--) { u8 mcs_160 = (mcs_160_map >> (2 * i)) & 3; @@ -2123,6 +2125,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, } } + /* Initialize rx_mcs_80 to 9 which is an invalid value */ + rx_mcs_80 = 9; for (i = 7; i >= 0; i--) { u8 mcs_80 = (mcs_80_map >> (2 * i)) & 3; diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c index 380f9d37b644..e3b65efcc868 100644 --- a/drivers/net/wireless/ath/ath11k/pcic.c +++ b/drivers/net/wireless/ath/ath11k/pcic.c @@ -453,8 +453,6 @@ void ath11k_pcic_ext_irq_enable(struct ath11k_base *ab) { int i; - set_bit(ATH11K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags); - for (i = 0; i < ATH11K_EXT_IRQ_GRP_NUM_MAX; i++) { struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; @@ -465,6 +463,8 @@ void ath11k_pcic_ext_irq_enable(struct ath11k_base *ab) } ath11k_pcic_ext_grp_enable(irq_grp); } + + set_bit(ATH11K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags); } EXPORT_SYMBOL(ath11k_pcic_ext_irq_enable); diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 237f4ec2cffd..6c33e898b300 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -306,7 +306,6 @@ struct ath9k_htc_tx { DECLARE_BITMAP(tx_slot, MAX_TX_BUF_NUM); struct timer_list cleanup_timer; spinlock_t tx_lock; - bool initialized; }; struct ath9k_htc_tx_ctl { @@ -515,6 +514,7 @@ struct ath9k_htc_priv { unsigned long ps_usecount; bool ps_enabled; bool ps_idle; + bool initialized; #ifdef CONFIG_MAC80211_LEDS enum led_brightness brightness; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index 96a3185a96d7..b014185373f3 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -966,6 +966,10 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev, htc_handle->drv_priv = priv; + /* Allow ath9k_wmi_event_tasklet() to operate. */ + smp_wmb(); + priv->initialized = true; + return 0; err_init: diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 672789e3c55d..2fdd27885f54 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -652,9 +652,10 @@ void ath9k_htc_txstatus(struct ath9k_htc_priv *priv, void *wmi_event) struct ath9k_htc_tx_event *tx_pend; int i; - for (i = 0; i < txs->cnt; i++) { - WARN_ON(txs->cnt > HTC_MAX_TX_STATUS); + if (WARN_ON_ONCE(txs->cnt > HTC_MAX_TX_STATUS)) + return; + for (i = 0; i < txs->cnt; i++) { __txs = &txs->txstatus[i]; skb = ath9k_htc_tx_get_packet(priv, __txs); @@ -814,10 +815,6 @@ int ath9k_tx_init(struct ath9k_htc_priv *priv) skb_queue_head_init(&priv->tx.data_vo_queue); skb_queue_head_init(&priv->tx.tx_failed); - /* Allow ath9k_wmi_event_tasklet(WMI_TXSTATUS_EVENTID) to operate. */ - smp_wmb(); - priv->tx.initialized = true; - return 0; } diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c index 1476b42b52a9..805ad31edba2 100644 --- a/drivers/net/wireless/ath/ath9k/wmi.c +++ b/drivers/net/wireless/ath/ath9k/wmi.c @@ -155,6 +155,12 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t) } spin_unlock_irqrestore(&wmi->wmi_lock, flags); + /* Check if ath9k_htc_probe_device() completed. */ + if (!data_race(priv->initialized)) { + kfree_skb(skb); + continue; + } + hdr = (struct wmi_cmd_hdr *) skb->data; cmd_id = be16_to_cpu(hdr->command_id); wmi_event = skb_pull(skb, sizeof(struct wmi_cmd_hdr)); @@ -169,10 +175,6 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t) &wmi->drv_priv->fatal_work); break; case WMI_TXSTATUS_EVENTID: - /* Check if ath9k_tx_init() completed. */ - if (!data_race(priv->tx.initialized)) - break; - spin_lock_bh(&priv->tx.tx_lock); if (priv->tx.flags & ATH9K_HTC_OP_TX_DRAIN) { spin_unlock_bh(&priv->tx.tx_lock); diff --git a/drivers/net/wireless/broadcom/b43/b43.h b/drivers/net/wireless/broadcom/b43/b43.h index 67b4bac048e5..c0d8fc0b22fb 100644 --- a/drivers/net/wireless/broadcom/b43/b43.h +++ b/drivers/net/wireless/broadcom/b43/b43.h @@ -1082,6 +1082,22 @@ static inline bool b43_using_pio_transfers(struct b43_wldev *dev) return dev->__using_pio_transfers; } +static inline void b43_wake_queue(struct b43_wldev *dev, int queue_prio) +{ + if (dev->qos_enabled) + ieee80211_wake_queue(dev->wl->hw, queue_prio); + else + ieee80211_wake_queue(dev->wl->hw, 0); +} + +static inline void b43_stop_queue(struct b43_wldev *dev, int queue_prio) +{ + if (dev->qos_enabled) + ieee80211_stop_queue(dev->wl->hw, queue_prio); + else + ieee80211_stop_queue(dev->wl->hw, 0); +} + /* Message printing */ __printf(2, 3) void b43info(struct b43_wl *wl, const char *fmt, ...); __printf(2, 3) void b43err(struct b43_wl *wl, const char *fmt, ...); diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c index 9a7c62bd5e43..cfaf2f9d67b2 100644 --- a/drivers/net/wireless/broadcom/b43/dma.c +++ b/drivers/net/wireless/broadcom/b43/dma.c @@ -1399,7 +1399,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb) should_inject_overflow(ring)) { /* This TX ring is full. */ unsigned int skb_mapping = skb_get_queue_mapping(skb); - ieee80211_stop_queue(dev->wl->hw, skb_mapping); + b43_stop_queue(dev, skb_mapping); dev->wl->tx_queue_stopped[skb_mapping] = true; ring->stopped = true; if (b43_debug(dev, B43_DBG_DMAVERBOSE)) { @@ -1570,7 +1570,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev, } else { /* If the driver queue is running wake the corresponding * mac80211 queue. */ - ieee80211_wake_queue(dev->wl->hw, ring->queue_prio); + b43_wake_queue(dev, ring->queue_prio); if (b43_debug(dev, B43_DBG_DMAVERBOSE)) { b43dbg(dev->wl, "Woke up TX ring %d\n", ring->index); } diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index b2539a916fd0..bdfa68cc7ee2 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -2587,7 +2587,8 @@ static void b43_request_firmware(struct work_struct *work) start_ieee80211: wl->hw->queues = B43_QOS_QUEUE_NUM; - if (!modparam_qos || dev->fw.opensource) + if (!modparam_qos || dev->fw.opensource || + dev->dev->chip_id == BCMA_CHIP_ID_BCM4331) wl->hw->queues = 1; err = ieee80211_register_hw(wl->hw); @@ -3603,7 +3604,7 @@ static void b43_tx_work(struct work_struct *work) err = b43_dma_tx(dev, skb); if (err == -ENOSPC) { wl->tx_queue_stopped[queue_num] = true; - ieee80211_stop_queue(wl->hw, queue_num); + b43_stop_queue(dev, queue_num); skb_queue_head(&wl->tx_queue[queue_num], skb); break; } @@ -3627,6 +3628,7 @@ static void b43_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb) { struct b43_wl *wl = hw_to_b43_wl(hw); + u16 skb_queue_mapping; if (unlikely(skb->len < 2 + 2 + 6)) { /* Too short, this can't be a valid frame. */ @@ -3635,12 +3637,12 @@ static void b43_op_tx(struct ieee80211_hw *hw, } B43_WARN_ON(skb_shinfo(skb)->nr_frags); - skb_queue_tail(&wl->tx_queue[skb->queue_mapping], skb); - if (!wl->tx_queue_stopped[skb->queue_mapping]) { + skb_queue_mapping = skb_get_queue_mapping(skb); + skb_queue_tail(&wl->tx_queue[skb_queue_mapping], skb); + if (!wl->tx_queue_stopped[skb_queue_mapping]) ieee80211_queue_work(wl->hw, &wl->tx_work); - } else { - ieee80211_stop_queue(wl->hw, skb->queue_mapping); - } + else + b43_stop_queue(wl->current_dev, skb_queue_mapping); } static void b43_qos_params_upload(struct b43_wldev *dev, diff --git a/drivers/net/wireless/broadcom/b43/pio.c b/drivers/net/wireless/broadcom/b43/pio.c index 8c28a9250cd1..cc19b589fa70 100644 --- a/drivers/net/wireless/broadcom/b43/pio.c +++ b/drivers/net/wireless/broadcom/b43/pio.c @@ -525,7 +525,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb) if (total_len > (q->buffer_size - q->buffer_used)) { /* Not enough memory on the queue. */ err = -EBUSY; - ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb)); + b43_stop_queue(dev, skb_get_queue_mapping(skb)); q->stopped = true; goto out; } @@ -552,7 +552,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb) if (((q->buffer_size - q->buffer_used) < roundup(2 + 2 + 6, 4)) || (q->free_packet_slots == 0)) { /* The queue is full. */ - ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb)); + b43_stop_queue(dev, skb_get_queue_mapping(skb)); q->stopped = true; } @@ -587,7 +587,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev, list_add(&pack->list, &q->packets_list); if (q->stopped) { - ieee80211_wake_queue(dev->wl->hw, q->queue_prio); + b43_wake_queue(dev, q->queue_prio); q->stopped = false; } } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index ad5a8d61d938..24a3d5a593f1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -791,8 +791,7 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg, scan_request = cfg->scan_request; cfg->scan_request = NULL; - if (timer_pending(&cfg->escan_timeout)) - del_timer_sync(&cfg->escan_timeout); + timer_delete_sync(&cfg->escan_timeout); if (fw_abort) { /* Do a scan abort to stop the driver's scan engine */ @@ -7805,6 +7804,7 @@ void brcmf_cfg80211_detach(struct brcmf_cfg80211_info *cfg) brcmf_btcoex_detach(cfg); wiphy_unregister(cfg->wiphy); wl_deinit_priv(cfg); + cancel_work_sync(&cfg->escan_timeout_work); brcmf_free_wiphy(cfg->wiphy); kfree(cfg); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c index ccc621b8ed9f..4a1fe982a948 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c @@ -383,8 +383,9 @@ struct shared_phy *wlc_phy_shared_attach(struct shared_phy_params *shp) return sh; } -static void wlc_phy_timercb_phycal(struct brcms_phy *pi) +static void wlc_phy_timercb_phycal(void *ptr) { + struct brcms_phy *pi = ptr; uint delay = 5; if (PHY_PERICAL_MPHASE_PENDING(pi)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c index a0de5db0cd64..b72381791536 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c @@ -57,12 +57,11 @@ void wlc_phy_shim_detach(struct phy_shim_info *physhim) } struct wlapi_timer *wlapi_init_timer(struct phy_shim_info *physhim, - void (*fn)(struct brcms_phy *pi), + void (*fn)(void *pi), void *arg, const char *name) { return (struct wlapi_timer *) - brcms_init_timer(physhim->wl, (void (*)(void *))fn, - arg, name); + brcms_init_timer(physhim->wl, fn, arg, name); } void wlapi_free_timer(struct wlapi_timer *t) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h index dd8774717ade..27d0934e600e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h @@ -131,7 +131,7 @@ void wlc_phy_shim_detach(struct phy_shim_info *physhim); /* PHY to WL utility functions */ struct wlapi_timer *wlapi_init_timer(struct phy_shim_info *physhim, - void (*fn)(struct brcms_phy *pi), + void (*fn)(void *pi), void *arg, const char *name); void wlapi_free_timer(struct wlapi_timer *t); void wlapi_add_timer(struct wlapi_timer *t, uint ms, int periodic); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 585e8cd2d332..235963e1d7a9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -576,7 +576,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -592,7 +592,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -608,7 +608,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -665,7 +665,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -681,7 +681,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -697,7 +697,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -725,7 +725,7 @@ read_table: * from index 1, so the maximum value allowed here is * ACPI_SAR_PROFILES_NUM - 1. */ - if (n_profiles <= 0 || n_profiles >= ACPI_SAR_PROFILE_NUM) { + if (n_profiles >= ACPI_SAR_PROFILE_NUM) { ret = -EINVAL; goto out_free; } @@ -1044,6 +1044,9 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt) goto read_table; } + ret = PTR_ERR(wifi_pkg); + goto out_free; + read_table: fwrt->ppag_ver = tbl_rev; flags = &wifi_pkg->package.elements[1]; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 3ef0b776b772..3b0ed1cdfa11 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -2903,8 +2903,6 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) struct iwl_fw_dbg_params params = {0}; struct iwl_fwrt_dump_data *dump_data = &fwrt->dump.wks[wk_idx].dump_data; - u32 policy; - u32 time_point; if (!test_bit(wk_idx, &fwrt->dump.active_wks)) return; @@ -2935,13 +2933,16 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, false); - policy = le32_to_cpu(dump_data->trig->apply_policy); - time_point = le32_to_cpu(dump_data->trig->time_point); + if (iwl_trans_dbg_ini_valid(fwrt->trans)) { + u32 policy = le32_to_cpu(dump_data->trig->apply_policy); + u32 time_point = le32_to_cpu(dump_data->trig->time_point); - if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { - IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); - iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { + IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); + iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + } } + if (fwrt->trans->dbg.last_tp_resetfw == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) iwl_force_nmi(fwrt->trans); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index a1d34f3e7a9f..677c9e0b46f1 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -103,6 +103,12 @@ static int iwl_dbg_tlv_alloc_debug_info(struct iwl_trans *trans, if (le32_to_cpu(tlv->length) != sizeof(*debug_info)) return -EINVAL; + /* we use this as a string, ensure input was NUL terminated */ + if (strnlen(debug_info->debug_cfg_name, + sizeof(debug_info->debug_cfg_name)) == + sizeof(debug_info->debug_cfg_name)) + return -EINVAL; + IWL_DEBUG_FW(trans, "WRT: Loading debug cfg: %s\n", debug_info->debug_cfg_name); @@ -1082,7 +1088,7 @@ static int iwl_dbg_tlv_override_trig_node(struct iwl_fw_runtime *fwrt, node_trig = (void *)node_tlv->data; } - memcpy(node_trig->data + offset, trig->data, trig_data_len); + memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len); node_tlv->length = cpu_to_le32(size); if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 2748459d1227..88f4f429d875 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -461,12 +461,10 @@ static int iwl_mvm_wowlan_config_rsc_tsc(struct iwl_mvm *mvm, struct wowlan_key_rsc_v5_data data = {}; int i; - data.rsc = kmalloc(sizeof(*data.rsc), GFP_KERNEL); + data.rsc = kzalloc(sizeof(*data.rsc), GFP_KERNEL); if (!data.rsc) return -ENOMEM; - memset(data.rsc, 0xff, sizeof(*data.rsc)); - for (i = 0; i < ARRAY_SIZE(data.rsc->mcast_key_id_map); i++) data.rsc->mcast_key_id_map[i] = IWL_MCAST_KEY_MAP_INVALID; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index f268a31ce26d..105f283b777d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -299,6 +299,7 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, u32 status, struct ieee80211_rx_status *stats) { + struct wireless_dev *wdev; struct iwl_mvm_sta *mvmsta; struct iwl_mvm_vif *mvmvif; u8 keyid; @@ -320,9 +321,15 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, if (!ieee80211_is_beacon(hdr->frame_control)) return 0; + if (!sta) + return -1; + + mvmsta = iwl_mvm_sta_from_mac80211(sta); + mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif); + /* key mismatch - will also report !MIC_OK but we shouldn't count it */ if (!(status & IWL_RX_MPDU_STATUS_KEY_VALID)) - return -1; + goto report; /* good cases */ if (likely(status & IWL_RX_MPDU_STATUS_MIC_OK && @@ -331,13 +338,6 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, return 0; } - if (!sta) - return -1; - - mvmsta = iwl_mvm_sta_from_mac80211(sta); - - mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif); - /* * both keys will have the same cipher and MIC length, use * whichever one is available @@ -346,11 +346,11 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, if (!key) { key = rcu_dereference(mvmvif->bcn_prot.keys[1]); if (!key) - return -1; + goto report; } if (len < key->icv_len + IEEE80211_GMAC_PN_LEN + 2) - return -1; + goto report; /* get the real key ID */ keyid = frame[len - key->icv_len - IEEE80211_GMAC_PN_LEN - 2]; @@ -364,7 +364,7 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, return -1; key = rcu_dereference(mvmvif->bcn_prot.keys[keyid - 6]); if (!key) - return -1; + goto report; } /* Report status to mac80211 */ @@ -372,6 +372,10 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, ieee80211_key_mic_failure(key); else if (status & IWL_RX_MPDU_STATUS_REPLAY_ERROR) ieee80211_key_replay(key); +report: + wdev = ieee80211_vif_to_wdev(mvmsta->vif); + if (wdev->netdev) + cfg80211_rx_unprot_mlme_mgmt(wdev->netdev, (void *)hdr, len); return -1; } diff --git a/drivers/net/wireless/marvell/libertas/cmd.c b/drivers/net/wireless/marvell/libertas/cmd.c index 104d2b6dc9af..5a525da434c2 100644 --- a/drivers/net/wireless/marvell/libertas/cmd.c +++ b/drivers/net/wireless/marvell/libertas/cmd.c @@ -1132,7 +1132,7 @@ int lbs_allocate_cmd_buffer(struct lbs_private *priv) if (!cmdarray[i].cmdbuf) { lbs_deb_host("ALLOC_CMD_BUF: ptempvirtualaddr is NULL\n"); ret = -1; - goto done; + goto free_cmd_array; } } @@ -1140,8 +1140,17 @@ int lbs_allocate_cmd_buffer(struct lbs_private *priv) init_waitqueue_head(&cmdarray[i].cmdwait_q); lbs_cleanup_and_insert_cmd(priv, &cmdarray[i]); } - ret = 0; + return 0; +free_cmd_array: + for (i = 0; i < LBS_NUM_CMD_BUFFERS; i++) { + if (cmdarray[i].cmdbuf) { + kfree(cmdarray[i].cmdbuf); + cmdarray[i].cmdbuf = NULL; + } + } + kfree(priv->cmd_array); + priv->cmd_array = NULL; done: return ret; } diff --git a/drivers/net/wireless/marvell/mwifiex/Kconfig b/drivers/net/wireless/marvell/mwifiex/Kconfig index 2b4ff2b78a7e..b182f7155d66 100644 --- a/drivers/net/wireless/marvell/mwifiex/Kconfig +++ b/drivers/net/wireless/marvell/mwifiex/Kconfig @@ -10,13 +10,14 @@ config MWIFIEX mwifiex. config MWIFIEX_SDIO - tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8987/SD8997" + tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8978/SD8987/SD8997" depends on MWIFIEX && MMC select FW_LOADER select WANT_DEV_COREDUMP help This adds support for wireless adapters based on Marvell - 8786/8787/8797/8887/8897/8977/8987/8997 chipsets with SDIO interface. + 8786/8787/8797/8887/8897/8977/8978/8987/8997 chipsets with + SDIO interface. SD8978 is also known as NXP IW416. If you choose to build it as a module, it will be called mwifiex_sdio. diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index 63f232c72337..55ca5b287fe7 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -964,9 +964,6 @@ mwifiex_dev_debugfs_init(struct mwifiex_private *priv) priv->dfs_dev_dir = debugfs_create_dir(priv->netdev->name, mwifiex_dfs_dir); - if (!priv->dfs_dev_dir) - return; - MWIFIEX_DFS_ADD_FILE(info); MWIFIEX_DFS_ADD_FILE(debug); MWIFIEX_DFS_ADD_FILE(getlog); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index ea1c1c2412e7..2c9b70e9a726 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -263,7 +263,7 @@ static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8887 = { 0x68, 0x69, 0x6a}, }; -static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8987 = { +static const struct mwifiex_sdio_card_reg mwifiex_reg_sd89xx = { .start_rd_port = 0, .start_wr_port = 0, .base_0_reg = 0xF8, @@ -331,6 +331,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8786 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = false, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = { @@ -346,6 +347,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = { @@ -361,6 +363,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { @@ -376,6 +379,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { .can_dump_fw = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { @@ -392,6 +396,24 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, +}; + +static const struct mwifiex_sdio_device mwifiex_sdio_sd8978 = { + .firmware_sdiouart = SD8978_SDIOUART_FW_NAME, + .reg = &mwifiex_reg_sd89xx, + .max_ports = 32, + .mp_agg_pkt_limit = 16, + .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K, + .mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .supports_sdio_new_mode = true, + .has_control_mask = false, + .can_dump_fw = true, + .fw_dump_enh = true, + .can_auto_tdls = false, + .can_ext_scan = true, + .fw_ready_extra_delay = true, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { @@ -409,6 +431,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { @@ -424,11 +447,12 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { .can_dump_fw = false, .can_auto_tdls = true, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { .firmware = SD8987_DEFAULT_FW_NAME, - .reg = &mwifiex_reg_sd8987, + .reg = &mwifiex_reg_sd89xx, .max_ports = 32, .mp_agg_pkt_limit = 16, .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_2K, @@ -440,6 +464,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { .fw_dump_enh = true, .can_auto_tdls = true, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8801 = { @@ -455,6 +480,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8801 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static struct memory_type_mapping generic_mem_type_map[] = { @@ -482,7 +508,9 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = { static const struct of_device_id mwifiex_sdio_of_match_table[] __maybe_unused = { { .compatible = "marvell,sd8787" }, { .compatible = "marvell,sd8897" }, + { .compatible = "marvell,sd8978" }, { .compatible = "marvell,sd8997" }, + { .compatible = "nxp,iw416" }, { } }; @@ -545,6 +573,7 @@ mwifiex_sdio_probe(struct sdio_func *func, const struct sdio_device_id *id) card->fw_dump_enh = data->fw_dump_enh; card->can_auto_tdls = data->can_auto_tdls; card->can_ext_scan = data->can_ext_scan; + card->fw_ready_extra_delay = data->fw_ready_extra_delay; INIT_WORK(&card->work, mwifiex_sdio_work); } @@ -748,8 +777,9 @@ mwifiex_sdio_read_fw_status(struct mwifiex_adapter *adapter, u16 *dat) static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) { + struct sdio_mmc_card *card = adapter->card; int ret = 0; - u16 firmware_stat; + u16 firmware_stat = 0; u32 tries; for (tries = 0; tries < poll_num; tries++) { @@ -765,6 +795,13 @@ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, ret = -1; } + if (card->fw_ready_extra_delay && + firmware_stat == FIRMWARE_READY_SDIO) + /* firmware might pretend to be ready, when it's not. + * Wait a little bit more as a workaround. + */ + msleep(100); + return ret; } @@ -920,6 +957,8 @@ static const struct sdio_device_id mwifiex_ids[] = { .driver_data = (unsigned long)&mwifiex_sdio_sd8801}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8977_WLAN), .driver_data = (unsigned long)&mwifiex_sdio_sd8977}, + {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8978_WLAN), + .driver_data = (unsigned long)&mwifiex_sdio_sd8978}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8987_WLAN), .driver_data = (unsigned long)&mwifiex_sdio_sd8987}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8997_WLAN), @@ -3164,6 +3203,7 @@ MODULE_FIRMWARE(SD8797_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8897_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8887_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8977_DEFAULT_FW_NAME); +MODULE_FIRMWARE(SD8978_SDIOUART_FW_NAME); MODULE_FIRMWARE(SD8987_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_SDIOUART_FW_NAME); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.h b/drivers/net/wireless/marvell/mwifiex/sdio.h index 3a24bb48b299..a5112cb35cdc 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.h +++ b/drivers/net/wireless/marvell/mwifiex/sdio.h @@ -25,6 +25,7 @@ #define SD8887_DEFAULT_FW_NAME "mrvl/sd8887_uapsta.bin" #define SD8801_DEFAULT_FW_NAME "mrvl/sd8801_uapsta.bin" #define SD8977_DEFAULT_FW_NAME "mrvl/sdsd8977_combo_v2.bin" +#define SD8978_SDIOUART_FW_NAME "mrvl/sdiouartiw416_combo_v0.bin" #define SD8987_DEFAULT_FW_NAME "mrvl/sd8987_uapsta.bin" #define SD8997_DEFAULT_FW_NAME "mrvl/sdsd8997_combo_v4.bin" #define SD8997_SDIOUART_FW_NAME "mrvl/sdiouart8997_combo_v4.bin" @@ -257,6 +258,7 @@ struct sdio_mmc_card { bool fw_dump_enh; bool can_auto_tdls; bool can_ext_scan; + bool fw_ready_extra_delay; struct mwifiex_sdio_mpa_tx mpa_tx; struct mwifiex_sdio_mpa_rx mpa_rx; @@ -280,6 +282,7 @@ struct mwifiex_sdio_device { bool fw_dump_enh; bool can_auto_tdls; bool can_ext_scan; + bool fw_ready_extra_delay; }; /* diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index b545d93c6e37..6f3245a43aef 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1615,7 +1615,6 @@ static int del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) cfg80211_unregister_netdevice(vif->ndev); vif->monitor_flag = 0; - wilc_set_operation_mode(vif, 0, 0, 0); mutex_lock(&wl->vif_mutex); list_del_rcu(&vif->list); wl->vif_num--; @@ -1810,15 +1809,24 @@ int wilc_cfg80211_init(struct wilc **wilc, struct device *dev, int io_type, INIT_LIST_HEAD(&wl->rxq_head.list); INIT_LIST_HEAD(&wl->vif_list); + wl->hif_workqueue = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, + wiphy_name(wl->wiphy)); + if (!wl->hif_workqueue) { + ret = -ENOMEM; + goto free_cfg; + } vif = wilc_netdev_ifc_init(wl, "wlan%d", WILC_STATION_MODE, NL80211_IFTYPE_STATION, false); if (IS_ERR(vif)) { ret = PTR_ERR(vif); - goto free_cfg; + goto free_hq; } return 0; +free_hq: + destroy_workqueue(wl->hif_workqueue); + free_cfg: wilc_wlan_cfg_deinit(wl); diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index a1b75feec6ed..5eb02902e875 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -374,38 +374,49 @@ out: void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct cfg80211_crypto_settings *crypto) { - struct wilc_join_bss_param *param; - struct ieee80211_p2p_noa_attr noa_attr; - u8 rates_len = 0; - const u8 *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie; + const u8 *ies_data, *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie; const u8 *ht_ie, *wpa_ie, *wmm_ie, *rsn_ie; + struct ieee80211_p2p_noa_attr noa_attr; + const struct cfg80211_bss_ies *ies; + struct wilc_join_bss_param *param; + u8 rates_len = 0, ies_len; int ret; - const struct cfg80211_bss_ies *ies = rcu_dereference(bss->ies); param = kzalloc(sizeof(*param), GFP_KERNEL); if (!param) return NULL; + rcu_read_lock(); + ies = rcu_dereference(bss->ies); + ies_data = kmemdup(ies->data, ies->len, GFP_ATOMIC); + if (!ies_data) { + rcu_read_unlock(); + kfree(param); + return NULL; + } + ies_len = ies->len; + rcu_read_unlock(); + param->beacon_period = cpu_to_le16(bss->beacon_interval); param->cap_info = cpu_to_le16(bss->capability); param->bss_type = WILC_FW_BSS_TYPE_INFRA; param->ch = ieee80211_frequency_to_channel(bss->channel->center_freq); ether_addr_copy(param->bssid, bss->bssid); - ssid_elm = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); + ssid_elm = cfg80211_find_ie(WLAN_EID_SSID, ies_data, ies_len); if (ssid_elm) { if (ssid_elm[1] <= IEEE80211_MAX_SSID_LEN) memcpy(param->ssid, ssid_elm + 2, ssid_elm[1]); } - tim_elm = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len); + tim_elm = cfg80211_find_ie(WLAN_EID_TIM, ies_data, ies_len); if (tim_elm && tim_elm[1] >= 2) param->dtim_period = tim_elm[3]; memset(param->p_suites, 0xFF, 3); memset(param->akm_suites, 0xFF, 3); - rates_ie = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies->data, ies->len); + rates_ie = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies_data, ies_len); if (rates_ie) { rates_len = rates_ie[1]; if (rates_len > WILC_MAX_RATES_SUPPORTED) @@ -416,7 +427,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, if (rates_len < WILC_MAX_RATES_SUPPORTED) { supp_rates_ie = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, - ies->data, ies->len); + ies_data, ies_len); if (supp_rates_ie) { u8 ext_rates = supp_rates_ie[1]; @@ -431,11 +442,11 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, } } - ht_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies->data, ies->len); + ht_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies_data, ies_len); if (ht_ie) param->ht_capable = true; - ret = cfg80211_get_p2p_attr(ies->data, ies->len, + ret = cfg80211_get_p2p_attr(ies_data, ies_len, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, (u8 *)&noa_attr, sizeof(noa_attr)); if (ret > 0) { @@ -459,7 +470,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, } wmm_ie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT, WLAN_OUI_TYPE_MICROSOFT_WMM, - ies->data, ies->len); + ies_data, ies_len); if (wmm_ie) { struct ieee80211_wmm_param_ie *ie; @@ -474,13 +485,13 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, wpa_ie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT, WLAN_OUI_TYPE_MICROSOFT_WPA, - ies->data, ies->len); + ies_data, ies_len); if (wpa_ie) { param->mode_802_11i = 1; param->rsn_found = true; } - rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); + rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies_data, ies_len); if (rsn_ie) { int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; @@ -514,6 +525,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, param->akm_suites[i] = crypto->akm_suites[i] & 0xFF; } + kfree(ies_data); return (void *)param; } diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index e9f59de31b0b..b714da48eaa1 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -878,8 +878,7 @@ static const struct net_device_ops wilc_netdev_ops = { void wilc_netdev_cleanup(struct wilc *wilc) { - struct wilc_vif *vif; - int srcu_idx, ifc_cnt = 0; + struct wilc_vif *vif, *vif_tmp; if (!wilc) return; @@ -889,32 +888,19 @@ void wilc_netdev_cleanup(struct wilc *wilc) wilc->firmware = NULL; } - srcu_idx = srcu_read_lock(&wilc->srcu); - list_for_each_entry_rcu(vif, &wilc->vif_list, list) { - if (vif->ndev) - unregister_netdev(vif->ndev); - } - srcu_read_unlock(&wilc->srcu, srcu_idx); - - wilc_wfi_deinit_mon_interface(wilc, false); - destroy_workqueue(wilc->hif_workqueue); - - while (ifc_cnt < WILC_NUM_CONCURRENT_IFC) { + list_for_each_entry_safe(vif, vif_tmp, &wilc->vif_list, list) { mutex_lock(&wilc->vif_mutex); - if (wilc->vif_num <= 0) { - mutex_unlock(&wilc->vif_mutex); - break; - } - vif = wilc_get_wl_to_vif(wilc); - if (!IS_ERR(vif)) - list_del_rcu(&vif->list); - + list_del_rcu(&vif->list); wilc->vif_num--; mutex_unlock(&wilc->vif_mutex); synchronize_srcu(&wilc->srcu); - ifc_cnt++; + if (vif->ndev) + unregister_netdev(vif->ndev); } + wilc_wfi_deinit_mon_interface(wilc, false); + destroy_workqueue(wilc->hif_workqueue); + wilc_wlan_cfg_deinit(wilc); wlan_deinit_locks(wilc); wiphy_unregister(wilc->wiphy); @@ -977,13 +963,6 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, goto error; } - wl->hif_workqueue = alloc_ordered_workqueue("%s-wq", WQ_MEM_RECLAIM, - ndev->name); - if (!wl->hif_workqueue) { - ret = -ENOMEM; - goto unregister_netdev; - } - ndev->needs_free_netdev = true; vif->iftype = vif_type; vif->idx = wilc_get_available_idx(wl); @@ -996,12 +975,11 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, return vif; -unregister_netdev: +error: if (rtnl_locked) cfg80211_unregister_netdevice(ndev); else unregister_netdev(ndev); - error: free_netdev(ndev); return ERR_PTR(ret); } diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c index b0fc5e68feec..5877e2c1fa0f 100644 --- a/drivers/net/wireless/microchip/wilc1000/spi.c +++ b/drivers/net/wireless/microchip/wilc1000/spi.c @@ -191,11 +191,11 @@ static void wilc_wlan_power(struct wilc *wilc, bool on) /* assert ENABLE: */ gpiod_set_value(gpios->enable, 1); mdelay(5); - /* assert RESET: */ - gpiod_set_value(gpios->reset, 1); - } else { /* deassert RESET: */ gpiod_set_value(gpios->reset, 0); + } else { + /* assert RESET: */ + gpiod_set_value(gpios->reset, 1); /* deassert ENABLE: */ gpiod_set_value(gpios->enable, 0); } diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 12b700c7b9c3..517d9023aae3 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -8672,7 +8672,7 @@ static void rt2800_rxdcoc_calibration(struct rt2x00_dev *rt2x00dev) rt2800_rfcsr_write_bank(rt2x00dev, 5, 4, saverfb5r4); rt2800_rfcsr_write_bank(rt2x00dev, 7, 4, saverfb7r4); - rt2800_bbp_write(rt2x00dev, 158, 141); + rt2800_bbp_write(rt2x00dev, 158, 140); bbpreg = rt2800_bbp_read(rt2x00dev, 159); bbpreg = bbpreg & (~0x40); rt2800_bbp_write(rt2x00dev, 159, bbpreg); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c index 9a9cfd0ce402..00b945053e19 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c @@ -101,6 +101,7 @@ void rt2x00lib_disable_radio(struct rt2x00_dev *rt2x00dev) rt2x00link_stop_tuner(rt2x00dev); rt2x00queue_stop_queues(rt2x00dev); rt2x00queue_flush_queues(rt2x00dev, true); + rt2x00queue_stop_queue(rt2x00dev->bcn); /* * Disable radio. @@ -1286,6 +1287,7 @@ int rt2x00lib_start(struct rt2x00_dev *rt2x00dev) rt2x00dev->intf_ap_count = 0; rt2x00dev->intf_sta_count = 0; rt2x00dev->intf_associated = 0; + rt2x00dev->intf_beaconing = 0; /* Enable the radio */ retval = rt2x00lib_enable_radio(rt2x00dev); @@ -1312,6 +1314,7 @@ void rt2x00lib_stop(struct rt2x00_dev *rt2x00dev) rt2x00dev->intf_ap_count = 0; rt2x00dev->intf_sta_count = 0; rt2x00dev->intf_associated = 0; + rt2x00dev->intf_beaconing = 0; } static inline void rt2x00lib_set_if_combinations(struct rt2x00_dev *rt2x00dev) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 4202c6517783..75fda72c14ca 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -598,6 +598,17 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, */ if (changes & BSS_CHANGED_BEACON_ENABLED) { mutex_lock(&intf->beacon_skb_mutex); + + /* + * Clear the 'enable_beacon' flag and clear beacon because + * the beacon queue has been stopped after hardware reset. + */ + if (test_bit(DEVICE_STATE_RESET, &rt2x00dev->flags) && + intf->enable_beacon) { + intf->enable_beacon = false; + rt2x00queue_clear_beacon(rt2x00dev, vif); + } + if (!bss_conf->enable_beacon && intf->enable_beacon) { rt2x00dev->intf_beaconing--; intf->enable_beacon = false; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 3a9fa3ff37ac..ccac47dd781d 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -6542,6 +6542,7 @@ static void rtl8xxxu_stop(struct ieee80211_hw *hw) if (priv->usb_interrupts) rtl8xxxu_write32(priv, REG_USB_HIMR, 0); + cancel_work_sync(&priv->c2hcmd_work); cancel_delayed_work_sync(&priv->ra_watchdog); rtl8xxxu_free_rx_resources(priv); @@ -7030,6 +7031,18 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192eu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x818c, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192eu_fops}, +/* D-Link DWA-131 rev C1 */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3312, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, +/* TP-Link TL-WN8200ND V2 */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0126, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, +/* Mercusys MW300UM */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2c4e, 0x0100, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, +/* Mercusys MW300UH */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2c4e, 0x0104, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, #endif { } }; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c index fe9b407dc2af..71e29b103da5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c @@ -49,7 +49,7 @@ u32 rtl8723e_phy_query_rf_reg(struct ieee80211_hw *hw, rfpath, regaddr); } - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); readback_value = (original_value & bitmask) >> bitshift; spin_unlock(&rtlpriv->locks.rf_lock); @@ -80,7 +80,7 @@ void rtl8723e_phy_set_rf_reg(struct ieee80211_hw *hw, original_value = rtl8723_phy_rf_serial_read(hw, rfpath, regaddr); - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); data = ((original_value & (~bitmask)) | (data << bitshift)); @@ -89,7 +89,7 @@ void rtl8723e_phy_set_rf_reg(struct ieee80211_hw *hw, rtl8723_phy_rf_serial_write(hw, rfpath, regaddr, data); } else { if (bitmask != RFREG_OFFSET_MASK) { - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); data = ((original_value & (~bitmask)) | (data << bitshift)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c index 2b9313cb93db..094cb36153f5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c @@ -41,7 +41,7 @@ u32 rtl8723be_phy_query_rf_reg(struct ieee80211_hw *hw, enum radio_path rfpath, spin_lock(&rtlpriv->locks.rf_lock); original_value = rtl8723_phy_rf_serial_read(hw, rfpath, regaddr); - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); readback_value = (original_value & bitmask) >> bitshift; spin_unlock(&rtlpriv->locks.rf_lock); @@ -68,7 +68,7 @@ void rtl8723be_phy_set_rf_reg(struct ieee80211_hw *hw, enum radio_path path, if (bitmask != RFREG_OFFSET_MASK) { original_value = rtl8723_phy_rf_serial_read(hw, path, regaddr); - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); data = ((original_value & (~bitmask)) | (data << bitshift)); } diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 4c8164db4a9e..81f3112923f1 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1989,8 +1989,6 @@ static int rtw_chip_board_info_setup(struct rtw_dev *rtwdev) rtw_phy_setup_phy_cond(rtwdev, 0); rtw_phy_init_tx_power(rtwdev); - if (rfe_def->agc_btg_tbl) - rtw_load_table(rtwdev, rfe_def->agc_btg_tbl); rtw_load_table(rtwdev, rfe_def->phy_pg_tbl); rtw_load_table(rtwdev, rfe_def->txpwr_lmt_tbl); rtw_phy_tx_power_by_rate_config(hal); diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c index bd7d05e08084..fde7b532bc07 100644 --- a/drivers/net/wireless/realtek/rtw88/phy.c +++ b/drivers/net/wireless/realtek/rtw88/phy.c @@ -1761,12 +1761,15 @@ static void rtw_load_rfk_table(struct rtw_dev *rtwdev) void rtw_phy_load_tables(struct rtw_dev *rtwdev) { + const struct rtw_rfe_def *rfe_def = rtw_get_rfe_def(rtwdev); const struct rtw_chip_info *chip = rtwdev->chip; u8 rf_path; rtw_load_table(rtwdev, chip->mac_tbl); rtw_load_table(rtwdev, chip->bb_tbl); rtw_load_table(rtwdev, chip->agc_tbl); + if (rfe_def->agc_btg_tbl) + rtw_load_table(rtwdev, rfe_def->agc_btg_tbl); rtw_load_rfk_table(rtwdev); for (rf_path = 0; rf_path < rtwdev->hal.rf_path_num; rf_path++) { diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 609a2b86330d..50e3e46f7d8a 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -674,9 +674,9 @@ static void rtw8821c_false_alarm_statistics(struct rtw_dev *rtwdev) dm_info->cck_fa_cnt = cck_fa_cnt; dm_info->ofdm_fa_cnt = ofdm_fa_cnt; + dm_info->total_fa_cnt = ofdm_fa_cnt; if (cck_enable) dm_info->total_fa_cnt += cck_fa_cnt; - dm_info->total_fa_cnt = ofdm_fa_cnt; crc32_cnt = rtw_read32(rtwdev, REG_CRC_CCK); dm_info->cck_ok_cnt = FIELD_GET(GENMASK(15, 0), crc32_cnt); diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c index 626dfb4b7a55..871667650dbe 100644 --- a/drivers/net/wireless/silabs/wfx/sta.c +++ b/drivers/net/wireless/silabs/wfx/sta.c @@ -354,29 +354,43 @@ static int wfx_upload_ap_templates(struct wfx_vif *wvif) return 0; } -static void wfx_set_mfp_ap(struct wfx_vif *wvif) +static int wfx_set_mfp_ap(struct wfx_vif *wvif) { struct ieee80211_vif *vif = wvif_to_vif(wvif); struct sk_buff *skb = ieee80211_beacon_get(wvif->wdev->hw, vif, 0); const int ieoffset = offsetof(struct ieee80211_mgmt, u.beacon.variable); - const u16 *ptr = (u16 *)cfg80211_find_ie(WLAN_EID_RSN, skb->data + ieoffset, - skb->len - ieoffset); const int pairwise_cipher_suite_count_offset = 8 / sizeof(u16); const int pairwise_cipher_suite_size = 4 / sizeof(u16); const int akm_suite_size = 4 / sizeof(u16); + int ret = -EINVAL; + const u16 *ptr; - if (ptr) { - ptr += pairwise_cipher_suite_count_offset; - if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return; - ptr += 1 + pairwise_cipher_suite_size * *ptr; - if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return; - ptr += 1 + akm_suite_size * *ptr; - if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return; - wfx_hif_set_mfp(wvif, *ptr & BIT(7), *ptr & BIT(6)); - } + if (unlikely(!skb)) + return -ENOMEM; + + ptr = (u16 *)cfg80211_find_ie(WLAN_EID_RSN, skb->data + ieoffset, + skb->len - ieoffset); + if (unlikely(!ptr)) + goto free_skb; + + ptr += pairwise_cipher_suite_count_offset; + if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) + goto free_skb; + + ptr += 1 + pairwise_cipher_suite_size * *ptr; + if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) + goto free_skb; + + ptr += 1 + akm_suite_size * *ptr; + if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) + goto free_skb; + + wfx_hif_set_mfp(wvif, *ptr & BIT(7), *ptr & BIT(6)); + ret = 0; + +free_skb: + dev_kfree_skb(skb); + return ret; } int wfx_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -394,8 +408,7 @@ int wfx_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, ret = wfx_hif_start(wvif, &vif->bss_conf, wvif->channel); if (ret > 0) return -EIO; - wfx_set_mfp_ap(wvif); - return ret; + return wfx_set_mfp_ap(wvif); } void wfx_stop_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 271604098574..0d51c900c553 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,13 +104,12 @@ bool provides_xdp_headroom = true; module_param(provides_xdp_headroom, bool, 0644); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); @@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; nr_slots--) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); ++txp; continue; } @@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, - flags); continue; } @@ -997,7 +981,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -1009,7 +992,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } @@ -1444,44 +1426,17 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } -static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) -{ - struct pending_tx_info *pending_tx_info; - pending_ring_idx_t index; - unsigned long flags; - - pending_tx_info = &queue->pending_tx_info[pending_idx]; - - spin_lock_irqsave(&queue->response_lock, flags); - - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); - - /* Release the pending index before pusing the Tx response so - * its available before a new Tx request is pushed by the - * frontend. - */ - index = pending_index(queue->pending_prod++); - queue->pending_ring[index] = pending_idx; - - push_tx_responses(queue); - - spin_unlock_irqrestore(&queue->response_lock, flags); -} - - -static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { RING_IDX i = queue->tx.rsp_prod_pvt; struct xen_netif_tx_response *resp; resp = RING_GET_RESPONSE(&queue->tx, i); resp->id = txp->id; - resp->status = st; + resp->status = status; while (extra_count-- != 0) RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; @@ -1498,6 +1453,47 @@ static void push_tx_responses(struct xenvif_queue *queue) notify_remote_via_irq(queue->tx_irq); } +static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, + s8 status) +{ + struct pending_tx_info *pending_tx_info; + pending_ring_idx_t index; + unsigned long flags; + + pending_tx_info = &queue->pending_tx_info[pending_idx]; + + spin_lock_irqsave(&queue->response_lock, flags); + + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); + + /* Release the pending index before pusing the Tx response so + * its available before a new Tx request is pushed by the + * frontend. + */ + index = pending_index(queue->pending_prod++); + queue->pending_ring[index] = pending_idx; + + push_tx_responses(queue); + + spin_unlock_irqrestore(&queue->response_lock, flags); +} + +static void make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); + + spin_unlock_irqrestore(&queue->response_lock, flags); +} + static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; diff --git a/drivers/ntb/core.c b/drivers/ntb/core.c index 27dd93deff6e..d702bee78082 100644 --- a/drivers/ntb/core.c +++ b/drivers/ntb/core.c @@ -100,6 +100,8 @@ EXPORT_SYMBOL(ntb_unregister_client); int ntb_register_device(struct ntb_dev *ntb) { + int ret; + if (!ntb) return -EINVAL; if (!ntb->pdev) @@ -120,7 +122,11 @@ int ntb_register_device(struct ntb_dev *ntb) ntb->ctx_ops = NULL; spin_lock_init(&ntb->ctx_lock); - return device_register(&ntb->dev); + ret = device_register(&ntb->dev); + if (ret) + put_device(&ntb->dev); + + return ret; } EXPORT_SYMBOL(ntb_register_device); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0c088db94470..d7516e99275b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4971,7 +4971,8 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, set->ops = ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; if (ctrl->ops->flags & NVME_F_FABRICS) - set->reserved_tags = NVMF_RESERVED_TAGS; + /* Reserved for fabric connect and keep alive */ + set->reserved_tags = 2; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_NO_SCHED; if (ctrl->ops->flags & NVME_F_BLOCKING) @@ -5029,7 +5030,15 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; set->queue_depth = ctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; + /* + * Some Apple controllers requires tags to be unique across admin and + * the (only) I/O queue, so reserve the first 32 tags of the I/O queue. + */ + if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS) + set->reserved_tags = NVME_AQ_DEPTH; + else if (ctrl->ops->flags & NVME_F_FABRICS) + /* Reserved for fabric connect */ + set->reserved_tags = 1; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index dcac3df8a5f7..60c238caf7a9 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -18,13 +18,6 @@ /* default is -1: the fail fast mechanism is disabled */ #define NVMF_DEF_FAIL_FAST_TMO -1 -/* - * Reserved one command for internal usage. This command is used for sending - * the connect command, as well as for the keep alive command on the admin - * queue once live. - */ -#define NVMF_RESERVED_TAGS 1 - /* * Define a host as seen by the target. We allocate one at boot, but also * allow the override it when creating controllers. This is both to provide diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 177a365b8ec5..3dbf926fd99f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3869,10 +3862,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3890,7 +3879,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3914,8 +3903,6 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3935,45 +3922,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1ab6601fdd5c..8a02ed63b156 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -165,7 +167,7 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; struct rcu_head rcu; @@ -248,6 +250,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -359,7 +368,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -372,7 +381,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -801,14 +811,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -830,15 +837,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -851,12 +856,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); kfree_rcu(queue, rcu); @@ -968,7 +969,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1077,8 +1078,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; @@ -1093,13 +1092,28 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); } static struct nvmet_fc_tgt_assoc * @@ -1111,6 +1125,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) int idx; bool needrandom = true; + if (!tgtport->pe) + return NULL; + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) return NULL; @@ -1130,7 +1147,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); while (needrandom) { @@ -1171,13 +1188,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1207,7 +1229,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1216,29 +1238,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + synchronize_rcu(); + + /* ensure all in-flight I/Os have been processed */ for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } - - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1414,6 +1428,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1491,9 +1506,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1546,9 +1560,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1580,7 +1593,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1592,9 +1605,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1624,6 +1636,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1869,9 +1883,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1886,8 +1897,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1903,6 +1912,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2539,8 +2551,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); @@ -2901,6 +2914,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2932,6 +2948,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c780af36c1d4..f5b8442b653d 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index ce42afe8f64e..348076827469 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1884,6 +1884,7 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); diff --git a/drivers/nvmem/meson-efuse.c b/drivers/nvmem/meson-efuse.c index d6b533497ce1..ba2714bef8d0 100644 --- a/drivers/nvmem/meson-efuse.c +++ b/drivers/nvmem/meson-efuse.c @@ -47,7 +47,6 @@ static int meson_efuse_probe(struct platform_device *pdev) struct nvmem_config *econfig; struct clk *clk; unsigned int size; - int ret; sm_np = of_parse_phandle(pdev->dev.of_node, "secure-monitor", 0); if (!sm_np) { @@ -60,27 +59,9 @@ static int meson_efuse_probe(struct platform_device *pdev) if (!fw) return -EPROBE_DEFER; - clk = devm_clk_get(dev, NULL); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to get efuse gate"); - return ret; - } - - ret = clk_prepare_enable(clk); - if (ret) { - dev_err(dev, "failed to enable gate"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - clk); - if (ret) { - dev_err(dev, "failed to add disable callback"); - return ret; - } + clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "failed to get efuse gate"); if (meson_sm_call(fw, SM_EFUSE_USER_MAX, &size, 0, 0, 0, 0, 0) < 0) { dev_err(dev, "failed to get max user"); diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 4402871b5c0c..e663d5585a05 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -45,8 +45,8 @@ struct target { /** * struct fragment - info about fragment nodes in overlay expanded device tree - * @target: target of the overlay operation * @overlay: pointer to the __overlay__ node + * @target: target of the overlay operation */ struct fragment { struct device_node *overlay; diff --git a/drivers/of/property.c b/drivers/of/property.c index b636777e6f7c..da5d71219770 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -762,7 +762,9 @@ struct device_node *of_graph_get_port_parent(struct device_node *node) /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); - if (depth == 2 && !of_node_name_eq(node, "ports")) + if (depth == 2 && !of_node_name_eq(node, "ports") && + !of_node_name_eq(node, "in-ports") && + !of_node_name_eq(node, "out-ports")) break; } return node; @@ -1243,7 +1245,7 @@ DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") -DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells") +DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL) DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") @@ -1261,7 +1263,6 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) -DEFINE_SIMPLE_PROP(remote_endpoint, "remote-endpoint", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) @@ -1326,6 +1327,17 @@ static struct device_node *parse_interrupts(struct device_node *np, return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } +static struct device_node *parse_remote_endpoint(struct device_node *np, + const char *prop_name, + int index) +{ + /* Return NULL for index > 0 to signify end of remote-endpoints. */ + if (index > 0 || strcmp(prop_name, "remote-endpoint")) + return NULL; + + return of_graph_get_remote_port_parent(np); +} + static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index e541a8960f1d..ce1386074e66 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -49,6 +49,12 @@ static struct unittest_results { failed; \ }) +#ifdef CONFIG_OF_KOBJ +#define OF_KREF_READ(NODE) kref_read(&(NODE)->kobj.kref) +#else +#define OF_KREF_READ(NODE) 1 +#endif + /* * Expected message may have a message level other than KERN_INFO. * Print the expected message only if the current loglevel will allow @@ -562,7 +568,7 @@ static void __init of_unittest_parse_phandle_with_args_map(void) pr_err("missing testcase data\n"); return; } - prefs[i] = kref_read(&p[i]->kobj.kref); + prefs[i] = OF_KREF_READ(p[i]); } rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells"); @@ -685,9 +691,9 @@ static void __init of_unittest_parse_phandle_with_args_map(void) unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); for (i = 0; i < ARRAY_SIZE(p); ++i) { - unittest(prefs[i] == kref_read(&p[i]->kobj.kref), + unittest(prefs[i] == OF_KREF_READ(p[i]), "provider%d: expected:%d got:%d\n", - i, prefs[i], kref_read(&p[i]->kobj.kref)); + i, prefs[i], OF_KREF_READ(p[i])); of_node_put(p[i]); } } diff --git a/drivers/opp/core.c b/drivers/opp/core.c index f0d70ecc0271..71d3e3ba909a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1226,12 +1226,12 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) * value of the frequency. In such a case, do not abort but * configure the hardware to the desired frequency forcefully. */ - forced = opp_table->rate_clk_single != target_freq; + forced = opp_table->rate_clk_single != freq; } - ret = _set_opp(dev, opp_table, opp, &target_freq, forced); + ret = _set_opp(dev, opp_table, opp, &freq, forced); - if (target_freq) + if (freq) dev_pm_opp_put(opp); put_opp_table: diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 8f7f19797bc2..2285308bf4b8 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -37,10 +37,12 @@ static ssize_t bw_name_read(struct file *fp, char __user *userbuf, size_t count, loff_t *ppos) { struct icc_path *path = fp->private_data; + const char *name = icc_get_name(path); char buf[64]; - int i; + int i = 0; - i = scnprintf(buf, sizeof(buf), "%.62s\n", icc_get_name(path)); + if (name) + i = scnprintf(buf, sizeof(buf), "%.62s\n", name); return simple_read_from_buffer(userbuf, count, ppos, buf, i); } diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index 332bcc0053a5..498bae2e3403 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -238,7 +238,7 @@ static int __init power_init(void) if (running_on_qemu && soft_power_reg) register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, SYS_OFF_PRIO_DEFAULT, qemu_power_off, (void *)soft_power_reg); - else + if (!running_on_qemu || soft_power_reg) power_task = kthread_run(kpowerswd, (void*)soft_power_reg, KTHREAD_NAME); if (IS_ERR(power_task)) { diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c index ad99707b3b99..dd7d74fecc48 100644 --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c @@ -18,6 +18,20 @@ #include "pcie-designware.h" +#define PEX_PF0_CONFIG 0xC0014 +#define PEX_PF0_CFG_READY BIT(0) + +/* PEX PFa PCIE PME and message interrupt registers*/ +#define PEX_PF0_PME_MES_DR 0xC0020 +#define PEX_PF0_PME_MES_DR_LUD BIT(7) +#define PEX_PF0_PME_MES_DR_LDD BIT(9) +#define PEX_PF0_PME_MES_DR_HRD BIT(10) + +#define PEX_PF0_PME_MES_IER 0xC0028 +#define PEX_PF0_PME_MES_IER_LUDIE BIT(7) +#define PEX_PF0_PME_MES_IER_LDDIE BIT(9) +#define PEX_PF0_PME_MES_IER_HRDIE BIT(10) + #define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev) struct ls_pcie_ep_drvdata { @@ -30,8 +44,99 @@ struct ls_pcie_ep { struct dw_pcie *pci; struct pci_epc_features *ls_epc; const struct ls_pcie_ep_drvdata *drvdata; + int irq; + u32 lnkcap; + bool big_endian; }; +static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset) +{ + struct dw_pcie *pci = pcie->pci; + + if (pcie->big_endian) + return ioread32be(pci->dbi_base + offset); + else + return ioread32(pci->dbi_base + offset); +} + +static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value) +{ + struct dw_pcie *pci = pcie->pci; + + if (pcie->big_endian) + iowrite32be(value, pci->dbi_base + offset); + else + iowrite32(value, pci->dbi_base + offset); +} + +static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) +{ + struct ls_pcie_ep *pcie = dev_id; + struct dw_pcie *pci = pcie->pci; + u32 val, cfg; + u8 offset; + + val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR); + ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val); + + if (!val) + return IRQ_NONE; + + if (val & PEX_PF0_PME_MES_DR_LUD) { + + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + + /* + * The values of the Maximum Link Width and Supported Link + * Speed from the Link Capabilities Register will be lost + * during link down or hot reset. Restore initial value + * that configured by the Reset Configuration Word (RCW). + */ + dw_pcie_dbi_ro_wr_en(pci); + dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap); + dw_pcie_dbi_ro_wr_dis(pci); + + cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG); + cfg |= PEX_PF0_CFG_READY; + ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg); + dw_pcie_ep_linkup(&pci->ep); + + dev_dbg(pci->dev, "Link up\n"); + } else if (val & PEX_PF0_PME_MES_DR_LDD) { + dev_dbg(pci->dev, "Link down\n"); + } else if (val & PEX_PF0_PME_MES_DR_HRD) { + dev_dbg(pci->dev, "Hot reset\n"); + } + + return IRQ_HANDLED; +} + +static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie, + struct platform_device *pdev) +{ + u32 val; + int ret; + + pcie->irq = platform_get_irq_byname(pdev, "pme"); + if (pcie->irq < 0) + return pcie->irq; + + ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler, + IRQF_SHARED, pdev->name, pcie); + if (ret) { + dev_err(&pdev->dev, "Can't register PCIe IRQ\n"); + return ret; + } + + /* Enable interrupts */ + val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER); + val |= PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE | + PEX_PF0_PME_MES_IER_LUDIE; + ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val); + + return 0; +} + static const struct pci_epc_features* ls_pcie_ep_get_features(struct dw_pcie_ep *ep) { @@ -124,6 +229,8 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) struct ls_pcie_ep *pcie; struct pci_epc_features *ls_epc; struct resource *dbi_base; + u8 offset; + int ret; pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -143,6 +250,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) pci->ops = pcie->drvdata->dw_pcie_ops; ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4); + ls_epc->linkup_notifier = true; pcie->pci = pci; pcie->ls_epc = ls_epc; @@ -154,9 +262,18 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) pci->ep.ops = &ls_pcie_ep_ops; + pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian"); + platform_set_drvdata(pdev, pcie); - return dw_pcie_ep_init(&pci->ep); + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP); + + ret = dw_pcie_ep_init(&pci->ep); + if (ret) + return ret; + + return ls_pcie_ep_interrupt_init(pcie, pdev); } static struct platform_driver ls_pcie_ep_driver = { diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 59c164b5c64a..506d6d061d4c 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -6,6 +6,7 @@ * Author: Kishon Vijay Abraham I */ +#include #include #include @@ -600,7 +601,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, } aligned_offset = msg_addr & (epc->mem->window.page_size - 1); - msg_addr &= ~aligned_offset; + msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, epc->mem->window.page_size); if (ret) @@ -668,8 +669,13 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; + /* + * PCIe r6.0, sec 7.8.6.2 require us to support at least one + * size in the range from 1 MB to 512 GB. Advertise support + * for 1 MB BAR size only. + */ for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL) - dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, BIT(4)); } dw_pcie_setup(pci); diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 0ccd92faf078..0bad23ec53ee 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -50,6 +50,7 @@ #define PARF_SLV_ADDR_SPACE_SIZE 0x358 #define PARF_DEVICE_TYPE 0x1000 #define PARF_BDF_TO_SID_TABLE_N 0x2000 +#define PARF_BDF_TO_SID_CFG 0x2c00 /* ELBI registers */ #define ELBI_SYS_CTRL 0x04 @@ -102,6 +103,9 @@ /* PARF_DEVICE_TYPE register fields */ #define DEVICE_TYPE_RC 0x4 +/* PARF_BDF_TO_SID_CFG fields */ +#define BDF_TO_SID_BYPASS BIT(0) + /* ELBI_SYS_CTRL register fields */ #define ELBI_SYS_CTRL_LT_ENABLE BIT(0) @@ -1312,6 +1316,82 @@ static void qcom_pcie_deinit_2_7_0(struct qcom_pcie *pcie) regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies); } +static int qcom_pcie_config_sid_1_9_0(struct qcom_pcie *pcie) +{ + /* iommu map structure */ + struct { + u32 bdf; + u32 phandle; + u32 smmu_sid; + u32 smmu_sid_len; + } *map; + void __iomem *bdf_to_sid_base = pcie->parf + PARF_BDF_TO_SID_TABLE_N; + struct device *dev = pcie->pci->dev; + u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE]; + int i, nr_map, size = 0; + u32 smmu_sid_base; + u32 val; + + of_get_property(dev->of_node, "iommu-map", &size); + if (!size) + return 0; + + /* Enable BDF to SID translation by disabling bypass mode (default) */ + val = readl(pcie->parf + PARF_BDF_TO_SID_CFG); + val &= ~BDF_TO_SID_BYPASS; + writel(val, pcie->parf + PARF_BDF_TO_SID_CFG); + + map = kzalloc(size, GFP_KERNEL); + if (!map) + return -ENOMEM; + + of_property_read_u32_array(dev->of_node, "iommu-map", (u32 *)map, + size / sizeof(u32)); + + nr_map = size / (sizeof(*map)); + + crc8_populate_msb(qcom_pcie_crc8_table, QCOM_PCIE_CRC8_POLYNOMIAL); + + /* Registers need to be zero out first */ + memset_io(bdf_to_sid_base, 0, CRC8_TABLE_SIZE * sizeof(u32)); + + /* Extract the SMMU SID base from the first entry of iommu-map */ + smmu_sid_base = map[0].smmu_sid; + + /* Look for an available entry to hold the mapping */ + for (i = 0; i < nr_map; i++) { + __be16 bdf_be = cpu_to_be16(map[i].bdf); + u32 val; + u8 hash; + + hash = crc8(qcom_pcie_crc8_table, (u8 *)&bdf_be, sizeof(bdf_be), 0); + + val = readl(bdf_to_sid_base + hash * sizeof(u32)); + + /* If the register is already populated, look for next available entry */ + while (val) { + u8 current_hash = hash++; + u8 next_mask = 0xff; + + /* If NEXT field is NULL then update it with next hash */ + if (!(val & next_mask)) { + val |= (u32)hash; + writel(val, bdf_to_sid_base + current_hash * sizeof(u32)); + } + + val = readl(bdf_to_sid_base + hash * sizeof(u32)); + } + + /* BDF [31:16] | SID [15:8] | NEXT [7:0] */ + val = map[i].bdf << 16 | (map[i].smmu_sid - smmu_sid_base) << 8 | 0; + writel(val, bdf_to_sid_base + hash * sizeof(u32)); + } + + kfree(map); + + return 0; +} + static int qcom_pcie_get_resources_2_9_0(struct qcom_pcie *pcie) { struct qcom_pcie_resources_2_9_0 *res = &pcie->res.v2_9_0; @@ -1429,77 +1509,6 @@ static int qcom_pcie_link_up(struct dw_pcie *pci) return !!(val & PCI_EXP_LNKSTA_DLLLA); } -static int qcom_pcie_config_sid_sm8250(struct qcom_pcie *pcie) -{ - /* iommu map structure */ - struct { - u32 bdf; - u32 phandle; - u32 smmu_sid; - u32 smmu_sid_len; - } *map; - void __iomem *bdf_to_sid_base = pcie->parf + PARF_BDF_TO_SID_TABLE_N; - struct device *dev = pcie->pci->dev; - u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE]; - int i, nr_map, size = 0; - u32 smmu_sid_base; - - of_get_property(dev->of_node, "iommu-map", &size); - if (!size) - return 0; - - map = kzalloc(size, GFP_KERNEL); - if (!map) - return -ENOMEM; - - of_property_read_u32_array(dev->of_node, - "iommu-map", (u32 *)map, size / sizeof(u32)); - - nr_map = size / (sizeof(*map)); - - crc8_populate_msb(qcom_pcie_crc8_table, QCOM_PCIE_CRC8_POLYNOMIAL); - - /* Registers need to be zero out first */ - memset_io(bdf_to_sid_base, 0, CRC8_TABLE_SIZE * sizeof(u32)); - - /* Extract the SMMU SID base from the first entry of iommu-map */ - smmu_sid_base = map[0].smmu_sid; - - /* Look for an available entry to hold the mapping */ - for (i = 0; i < nr_map; i++) { - __be16 bdf_be = cpu_to_be16(map[i].bdf); - u32 val; - u8 hash; - - hash = crc8(qcom_pcie_crc8_table, (u8 *)&bdf_be, sizeof(bdf_be), - 0); - - val = readl(bdf_to_sid_base + hash * sizeof(u32)); - - /* If the register is already populated, look for next available entry */ - while (val) { - u8 current_hash = hash++; - u8 next_mask = 0xff; - - /* If NEXT field is NULL then update it with next hash */ - if (!(val & next_mask)) { - val |= (u32)hash; - writel(val, bdf_to_sid_base + current_hash * sizeof(u32)); - } - - val = readl(bdf_to_sid_base + hash * sizeof(u32)); - } - - /* BDF [31:16] | SID [15:8] | NEXT [7:0] */ - val = map[i].bdf << 16 | (map[i].smmu_sid - smmu_sid_base) << 8 | 0; - writel(val, bdf_to_sid_base + hash * sizeof(u32)); - } - - kfree(map); - - return 0; -} - static int qcom_pcie_host_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); @@ -1616,7 +1625,7 @@ static const struct qcom_pcie_ops ops_1_9_0 = { .init = qcom_pcie_init_2_7_0, .deinit = qcom_pcie_deinit_2_7_0, .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, - .config_sid = qcom_pcie_config_sid_sm8250, + .config_sid = qcom_pcie_config_sid_1_9_0, }; /* Qcom IP rev.: 2.9.0 Synopsys IP rev.: 5.00a */ diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9693bab59bf7..b36cbc9136ae 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -49,6 +49,7 @@ #include #include #include +#include #include /* @@ -465,7 +466,7 @@ struct pci_eject_response { u32 status; } __packed; -static int pci_ring_size = (4 * PAGE_SIZE); +static int pci_ring_size = VMBUS_RING_SIZE(SZ_16K); /* * Driver specific state. diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 8c6931210ac4..b4c1a4f6029d 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -1281,14 +1281,11 @@ static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = ntb_register_device(&ndev->ntb); if (ret) { dev_err(dev, "Failed to register NTB device\n"); - goto err_register_dev; + return ret; } dev_dbg(dev, "PCI Virtual NTB driver loaded\n"); return 0; - -err_register_dev: - return -EINVAL; } static struct pci_device_id pci_vntb_table[] = { diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index e9cf318e6670..34877a1f43a1 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -60,7 +60,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) return (irq_hw_number_t)desc->msi_index | pci_dev_id(dev) << 11 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; + ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27; } static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f47a3b10bf50..8dda3b205dfd 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -473,6 +473,13 @@ static void pci_device_remove(struct device *dev) if (drv->remove) { pm_runtime_get_sync(dev); + /* + * If the driver provides a .runtime_idle() callback and it has + * started to run already, it may continue to run in parallel + * with the code below, so wait until all of the runtime PM + * activity has completed. + */ + pm_runtime_barrier(dev); drv->remove(pci_dev); pm_runtime_put_noidle(dev); } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index ed6d75d138c7..9950deeb047a 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -274,7 +274,7 @@ void pci_bus_put(struct pci_bus *bus); /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ - ((speed) == PCIE_SPEED_64_0GT ? 64000*128/130 : \ + ((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \ (speed) == PCIE_SPEED_32_0GT ? 32000*128/130 : \ (speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \ (speed) == PCIE_SPEED_8_0GT ? 8000*128/130 : \ @@ -357,11 +357,6 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) return 0; } -static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) -{ - return dev->error_state == pci_channel_io_perm_failure; -} - /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index e2d8a74f83c3..5426f450ce91 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -748,7 +748,7 @@ static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) u8 bus = info->id >> 8; u8 devfn = info->id & 0xff; - pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n", + pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d\n", info->multi_error_valid ? "Multiple " : "", aer_error_severity_string[info->severity], pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), @@ -936,7 +936,12 @@ static bool find_source_device(struct pci_dev *parent, pci_walk_bus(parent->subordinate, find_device_iter, e_info); if (!e_info->error_dev_num) { - pci_info(parent, "can't find device of ID%04x\n", e_info->id); + u8 bus = e_info->id >> 8; + u8 devfn = e_info->id & 0xff; + + pci_info(parent, "found no error details for %04x:%02x:%02x.%d\n", + pci_domain_nr(parent->bus), bus, PCI_SLOT(devfn), + PCI_FUNC(devfn)); return false; } return true; diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index a5d7c69b764e..08800282825e 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -231,7 +231,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { pci_read_config_dword(pdev, - cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, &prefix); pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); } clear_status: diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 59c90d04a609..705893b5f7b0 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -13,6 +13,7 @@ #define dev_fmt(fmt) "AER: " fmt #include +#include #include #include #include @@ -85,6 +86,18 @@ static int report_error_detected(struct pci_dev *dev, return 0; } +static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data) +{ + pm_runtime_get_sync(&pdev->dev); + return 0; +} + +static int pci_pm_runtime_put(struct pci_dev *pdev, void *data) +{ + pm_runtime_put(&pdev->dev); + return 0; +} + static int report_frozen_detected(struct pci_dev *dev, void *data) { return report_error_detected(dev, pci_channel_io_frozen, data); @@ -207,6 +220,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, else bridge = pci_upstream_bridge(dev); + pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL); + pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); @@ -251,10 +266,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pcie_clear_device_status(dev); pci_aer_clear_nonfatal_status(dev); } + + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_info(bridge, "device recovery successful\n"); return status; failed: + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d8c0d40f8a57..c1002f676415 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -607,10 +607,13 @@ static void quirk_amd_dwc_class(struct pci_dev *pdev) { u32 class = pdev->class; - /* Use "USB Device (not host controller)" class */ - pdev->class = PCI_CLASS_SERIAL_USB_DEVICE; - pci_info(pdev, "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n", - class, pdev->class); + if (class != PCI_CLASS_SERIAL_USB_DEVICE) { + /* Use "USB Device (not host controller)" class */ + pdev->class = PCI_CLASS_SERIAL_USB_DEVICE; + pci_info(pdev, + "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n", + class, pdev->class); + } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB, quirk_amd_dwc_class); @@ -3698,6 +3701,19 @@ static void quirk_no_pm_reset(struct pci_dev *dev) DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset); +/* + * Spectrum-{1,2,3,4} devices report that a D3hot->D0 transition causes a reset + * (i.e., they advertise NoSoftRst-). However, this transition does not have + * any effect on the device: It continues to be operational and network ports + * remain up. Advertising this support makes it seem as if a PM reset is viable + * for these devices. Mark it as unavailable to skip it when testing reset + * methods. + */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcb84, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf6c, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf70, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf80, quirk_no_pm_reset); + /* * Thunderbolt controllers with broken MSI hotplug signaling: * Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part @@ -5406,6 +5422,7 @@ static void quirk_no_ext_tags(struct pci_dev *pdev) pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL); } +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_3WARE, 0x1004, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0132, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0141, quirk_no_ext_tags); @@ -6068,6 +6085,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2b, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa73f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa76e, dpc_log_size); #endif /* diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 0c1faa6c1973..d05a482639e3 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1308,13 +1308,6 @@ static void stdev_release(struct device *dev) { struct switchtec_dev *stdev = to_stdev(dev); - if (stdev->dma_mrpc) { - iowrite32(0, &stdev->mmio_mrpc->dma_en); - flush_wc_buf(stdev); - writeq(0, &stdev->mmio_mrpc->dma_addr); - dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), - stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); - } kfree(stdev); } @@ -1358,7 +1351,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) return ERR_PTR(-ENOMEM); stdev->alive = true; - stdev->pdev = pdev; + stdev->pdev = pci_dev_get(pdev); INIT_LIST_HEAD(&stdev->mrpc_queue); mutex_init(&stdev->mrpc_mutex); stdev->mrpc_busy = 0; @@ -1391,6 +1384,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) return stdev; err_put: + pci_dev_put(stdev->pdev); put_device(&stdev->dev); return ERR_PTR(rc); } @@ -1646,6 +1640,18 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, return 0; } +static void switchtec_exit_pci(struct switchtec_dev *stdev) +{ + if (stdev->dma_mrpc) { + iowrite32(0, &stdev->mmio_mrpc->dma_en); + flush_wc_buf(stdev); + writeq(0, &stdev->mmio_mrpc->dma_addr); + dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), + stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); + stdev->dma_mrpc = NULL; + } +} + static int switchtec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1668,7 +1674,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev, rc = switchtec_init_isr(stdev); if (rc) { dev_err(&stdev->dev, "failed to init isr.\n"); - goto err_put; + goto err_exit_pci; } iowrite32(SWITCHTEC_EVENT_CLEAR | @@ -1689,6 +1695,8 @@ static int switchtec_pci_probe(struct pci_dev *pdev, err_devadd: stdev_kill(stdev); +err_exit_pci: + switchtec_exit_pci(stdev); err_put: ida_free(&switchtec_minor_ida, MINOR(stdev->dev.devt)); put_device(&stdev->dev); @@ -1705,6 +1713,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev) ida_free(&switchtec_minor_ida, MINOR(stdev->dev.devt)); dev_info(&stdev->dev, "unregistered.\n"); stdev_kill(stdev); + switchtec_exit_pci(stdev); + pci_dev_put(stdev->pdev); + stdev->pdev = NULL; put_device(&stdev->dev); } diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 47e7c3206939..899e4ed49905 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2178,6 +2178,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); continue; } + /* + * AmpereOneX erratum AC04_MESH_1 makes some XPs report a bogus + * child count larger than the number of valid child pointers. + * A child offset of 0 can only occur on CMN-600; otherwise it + * would imply the root node being its own grandchild, which + * we can safely dismiss in general. + */ + if (reg == 0 && cmn->part != PART_CMN600) { + dev_dbg(cmn->dev, "bogus child pointer?\n"); + continue; + } arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c index e625b32889bf..0928a526e2ab 100644 --- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c @@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev) return ret; } - priv->id = of_alias_get_id(np, "mipi_dphy"); + priv->id = of_alias_get_id(np, "mipi-dphy"); if (priv->id < 0) { dev_err(dev, "Failed to get phy node alias id: %d\n", priv->id); diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 9de617ca9daa..7e61c6b278a7 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -675,8 +675,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { - int ret; - channel->is_otg_channel = true; channel->uses_otg_pins = !of_property_read_bool(dev->of_node, "renesas,no-otg-pins"); @@ -740,8 +738,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) ret = PTR_ERR(provider); goto error; } else if (channel->is_otg_channel) { - int ret; - ret = device_create_file(dev, &dev_attr_role); if (ret < 0) goto error; diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 4d5b4071d47d..dc22b1dd2c8b 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -1518,6 +1518,19 @@ int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, } EXPORT_SYMBOL_GPL(tegra_xusb_padctl_get_usb3_companion); +int tegra_xusb_padctl_get_port_number(struct phy *phy) +{ + struct tegra_xusb_lane *lane; + + if (!phy) + return -ENODEV; + + lane = phy_get_drvdata(phy); + + return lane->index; +} +EXPORT_SYMBOL_GPL(tegra_xusb_padctl_get_port_number); + MODULE_AUTHOR("Thierry Reding "); MODULE_DESCRIPTION("Tegra XUSB Pad Controller driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index 31a775877f6e..63c45809943f 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -116,7 +116,7 @@ static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->set_vbus) return -ENODEV; return phy->comparator->set_vbus(phy->comparator, enabled); @@ -126,7 +126,7 @@ static int omap_usb_start_srp(struct usb_otg *otg) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->start_srp) return -ENODEV; return phy->comparator->start_srp(phy->comparator); diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8186.c b/drivers/pinctrl/mediatek/pinctrl-mt8186.c index a02f7c326970..09edcf47effe 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8186.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8186.c @@ -1198,7 +1198,6 @@ static const struct mtk_pin_reg_calc mt8186_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8186_pin_dir_range), [PINCTRL_PIN_REG_DI] = MTK_RANGE(mt8186_pin_di_range), [PINCTRL_PIN_REG_DO] = MTK_RANGE(mt8186_pin_do_range), - [PINCTRL_PIN_REG_SR] = MTK_RANGE(mt8186_pin_dir_range), [PINCTRL_PIN_REG_SMT] = MTK_RANGE(mt8186_pin_smt_range), [PINCTRL_PIN_REG_IES] = MTK_RANGE(mt8186_pin_ies_range), [PINCTRL_PIN_REG_PU] = MTK_RANGE(mt8186_pin_pu_range), diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8192.c b/drivers/pinctrl/mediatek/pinctrl-mt8192.c index 9695f4ec6aba..f120268c00f5 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8192.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8192.c @@ -1379,7 +1379,6 @@ static const struct mtk_pin_reg_calc mt8192_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8192_pin_dir_range), [PINCTRL_PIN_REG_DI] = MTK_RANGE(mt8192_pin_di_range), [PINCTRL_PIN_REG_DO] = MTK_RANGE(mt8192_pin_do_range), - [PINCTRL_PIN_REG_SR] = MTK_RANGE(mt8192_pin_dir_range), [PINCTRL_PIN_REG_SMT] = MTK_RANGE(mt8192_pin_smt_range), [PINCTRL_PIN_REG_IES] = MTK_RANGE(mt8192_pin_ies_range), [PINCTRL_PIN_REG_PU] = MTK_RANGE(mt8192_pin_pu_range), diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index f7d02513d8cc..e79037dc8579 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -1571,8 +1571,10 @@ static int nmk_pmx_set(struct pinctrl_dev *pctldev, unsigned function, * Then mask the pins that need to be sleeping now when we're * switching to the ALT C function. */ - for (i = 0; i < g->grp.npins; i++) - slpm[g->grp.pins[i] / NMK_GPIO_PER_CHIP] &= ~BIT(g->grp.pins[i]); + for (i = 0; i < g->grp.npins; i++) { + unsigned int bit = g->grp.pins[i] % NMK_GPIO_PER_CHIP; + slpm[g->grp.pins[i] / NMK_GPIO_PER_CHIP] &= ~BIT(bit); + } nmk_gpio_glitch_slpm_init(slpm); } diff --git a/drivers/pinctrl/renesas/pfc-r8a779g0.c b/drivers/pinctrl/renesas/pfc-r8a779g0.c index 43a63a21a6fb..acf7664ea835 100644 --- a/drivers/pinctrl/renesas/pfc-r8a779g0.c +++ b/drivers/pinctrl/renesas/pfc-r8a779g0.c @@ -2360,6 +2360,30 @@ static const unsigned int scif_clk_mux[] = { SCIF_CLK_MARK, }; +static const unsigned int scif_clk2_pins[] = { + /* SCIF_CLK2 */ + RCAR_GP_PIN(8, 11), +}; +static const unsigned int scif_clk2_mux[] = { + SCIF_CLK2_MARK, +}; + +/* - SSI ------------------------------------------------- */ +static const unsigned int ssi_data_pins[] = { + /* SSI_SD */ + RCAR_GP_PIN(1, 20), +}; +static const unsigned int ssi_data_mux[] = { + SSI_SD_MARK, +}; +static const unsigned int ssi_ctrl_pins[] = { + /* SSI_SCK, SSI_WS */ + RCAR_GP_PIN(1, 18), RCAR_GP_PIN(1, 19), +}; +static const unsigned int ssi_ctrl_mux[] = { + SSI_SCK_MARK, SSI_WS_MARK, +}; + /* - TPU ------------------------------------------------------------------- */ static const unsigned int tpu_to0_pins[] = { /* TPU0TO0 */ @@ -2651,6 +2675,10 @@ static const struct sh_pfc_pin_group pinmux_groups[] = { SH_PFC_PIN_GROUP(scif4_clk), SH_PFC_PIN_GROUP(scif4_ctrl), SH_PFC_PIN_GROUP(scif_clk), + SH_PFC_PIN_GROUP(scif_clk2), + + SH_PFC_PIN_GROUP(ssi_data), + SH_PFC_PIN_GROUP(ssi_ctrl), SH_PFC_PIN_GROUP(tpu_to0), /* suffix might be updated */ SH_PFC_PIN_GROUP(tpu_to0_a), /* suffix might be updated */ @@ -2964,6 +2992,15 @@ static const char * const scif_clk_groups[] = { "scif_clk", }; +static const char * const scif_clk2_groups[] = { + "scif_clk2", +}; + +static const char * const ssi_groups[] = { + "ssi_data", + "ssi_ctrl", +}; + static const char * const tpu_groups[] = { /* suffix might be updated */ "tpu_to0", @@ -3044,6 +3081,9 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(scif3), SH_PFC_FUNCTION(scif4), SH_PFC_FUNCTION(scif_clk), + SH_PFC_FUNCTION(scif_clk2), + + SH_PFC_FUNCTION(ssi), SH_PFC_FUNCTION(tpu), diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 9b12fe8e95c9..dd2e654daf4b 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -74,30 +74,30 @@ static ssize_t show_perf_status_freq_khz(struct uncore_data *data, char *buf) } #define store_uncore_min_max(name, min_max) \ - static ssize_t store_##name(struct device *dev, \ - struct device_attribute *attr, \ + static ssize_t store_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, \ const char *buf, size_t count) \ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return store_min_max_freq_khz(data, buf, count, \ min_max); \ } #define show_uncore_min_max(name, min_max) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_min_max_freq_khz(data, buf, min_max); \ } #define show_uncore_perf_status(name) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_perf_status_freq_khz(data, buf); \ } @@ -111,11 +111,11 @@ show_uncore_min_max(max_freq_khz, 1); show_uncore_perf_status(current_freq_khz); #define show_uncore_data(member_name) \ - static ssize_t show_##member_name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##member_name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ struct uncore_data *data = container_of(attr, struct uncore_data,\ - member_name##_dev_attr);\ + member_name##_kobj_attr);\ \ return sysfs_emit(buf, "%u\n", \ data->member_name); \ @@ -126,29 +126,29 @@ show_uncore_data(initial_max_freq_khz); #define init_attribute_rw(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = store_##_name; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0644; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = store_##_name; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0644; \ } while (0) #define init_attribute_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0444; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0444; \ } while (0) #define init_attribute_root_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0400; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0400; \ } while (0) static int create_attr_group(struct uncore_data *data, char *name) @@ -161,14 +161,14 @@ static int create_attr_group(struct uncore_data *data, char *name) init_attribute_ro(initial_max_freq_khz); init_attribute_root_ro(current_freq_khz); - data->uncore_attrs[index++] = &data->max_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->max_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_max_freq_khz_kobj_attr.attr; ret = uncore_read_freq(data, &freq); if (!ret) - data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->current_freq_khz_kobj_attr.attr; data->uncore_attrs[index] = NULL; diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h index f5dcfa2fb285..2d9dc3151d6e 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h @@ -23,11 +23,11 @@ * @die_id: Die id for this instance * @name: Sysfs entry name for this instance * @uncore_attr_group: Attribute group storage - * @max_freq_khz_dev_attr: Storage for device attribute max_freq_khz - * @mix_freq_khz_dev_attr: Storage for device attribute min_freq_khz - * @initial_max_freq_khz_dev_attr: Storage for device attribute initial_max_freq_khz - * @initial_min_freq_khz_dev_attr: Storage for device attribute initial_min_freq_khz - * @current_freq_khz_dev_attr: Storage for device attribute current_freq_khz + * @max_freq_khz_dev_attr: Storage for kobject attribute max_freq_khz + * @mix_freq_khz_dev_attr: Storage for kobject attribute min_freq_khz + * @initial_max_freq_khz_dev_attr: Storage for kobject attribute initial_max_freq_khz + * @initial_min_freq_khz_dev_attr: Storage for kobject attribute initial_min_freq_khz + * @current_freq_khz_dev_attr: Storage for kobject attribute current_freq_khz * @uncore_attrs: Attribute storage for group creation * * This structure is used to encapsulate all data related to uncore sysfs @@ -44,11 +44,11 @@ struct uncore_data { char name[32]; struct attribute_group uncore_attr_group; - struct device_attribute max_freq_khz_dev_attr; - struct device_attribute min_freq_khz_dev_attr; - struct device_attribute initial_max_freq_khz_dev_attr; - struct device_attribute initial_min_freq_khz_dev_attr; - struct device_attribute current_freq_khz_dev_attr; + struct kobj_attribute max_freq_khz_kobj_attr; + struct kobj_attribute min_freq_khz_kobj_attr; + struct kobj_attribute initial_max_freq_khz_kobj_attr; + struct kobj_attribute initial_min_freq_khz_kobj_attr; + struct kobj_attribute current_freq_khz_kobj_attr; struct attribute *uncore_attrs[6]; }; diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 8e2b07ed2ce9..c10c99a31a90 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); - - /* Some devices need this to report further events */ - acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 1cf2471d54dd..a64f56ddd4a4 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -20,12 +20,29 @@ #define P2SBC_HIDE BIT(8) #define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1) +#define P2SB_DEVFN_GOLDMONT PCI_DEVFN(13, 0) +#define SPI_DEVFN_GOLDMONT PCI_DEVFN(13, 2) static const struct x86_cpu_id p2sb_cpu_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, P2SB_DEVFN_GOLDMONT), {} }; +/* + * Cache BAR0 of P2SB device functions 0 to 7. + * TODO: The constant 8 is the number of functions that PCI specification + * defines. Same definitions exist tree-wide. Unify this definition and + * the other definitions then move to include/uapi/linux/pci.h. + */ +#define NR_P2SB_RES_CACHE 8 + +struct p2sb_res_cache { + u32 bus_dev_id; + struct resource res; +}; + +static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; + static int p2sb_get_devfn(unsigned int *devfn) { unsigned int fn = P2SB_DEVFN_DEFAULT; @@ -39,8 +56,16 @@ static int p2sb_get_devfn(unsigned int *devfn) return 0; } +static bool p2sb_valid_resource(struct resource *res) +{ + if (res->flags) + return true; + + return false; +} + /* Copy resource from the first BAR of the device in question */ -static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) +static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { struct resource *bar0 = &pdev->resource[0]; @@ -56,22 +81,99 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) mem->end = bar0->end; mem->flags = bar0->flags; mem->desc = bar0->desc; +} + +static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) +{ + struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; + struct pci_dev *pdev; + + pdev = pci_scan_single_device(bus, devfn); + if (!pdev) + return; + + p2sb_read_bar0(pdev, &cache->res); + cache->bus_dev_id = bus->dev.id; + + pci_stop_and_remove_bus_device(pdev); +} + +static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) +{ + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + + /* On Goldmont p2sb_bar() also gets called for the SPI controller */ + if (devfn == P2SB_DEVFN_GOLDMONT) + p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); + + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; return 0; } -static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) { - struct pci_dev *pdev; + static struct pci_bus *p2sb_bus; + + bus = bus ?: p2sb_bus; + if (bus) + return bus; + + /* Assume P2SB is on the bus 0 in domain 0 */ + p2sb_bus = pci_find_bus(0, 0); + return p2sb_bus; +} + +static int p2sb_cache_resources(void) +{ + unsigned int devfn_p2sb; + u32 value = P2SBC_HIDE; + struct pci_bus *bus; + u16 class; int ret; - pdev = pci_scan_single_device(bus, devfn); - if (!pdev) + /* Get devfn for P2SB device itself */ + ret = p2sb_get_devfn(&devfn_p2sb); + if (ret) + return ret; + + bus = p2sb_get_bus(NULL); + if (!bus) return -ENODEV; - ret = p2sb_read_bar0(pdev, mem); + /* + * When a device with same devfn exists and its device class is not + * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it. + */ + pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class); + if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER) + return -ENODEV; + + /* + * Prevent concurrent PCI bus scan from seeing the P2SB device and + * removing via sysfs while it is temporarily exposed. + */ + pci_lock_rescan_remove(); + + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + * Unhide the P2SB device here, if needed. + */ + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); + + ret = p2sb_scan_and_cache(bus, devfn_p2sb); + + /* Hide the P2SB device, if it was hidden */ + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); + + pci_unlock_rescan_remove(); - pci_stop_and_remove_bus_device(pdev); return ret; } @@ -81,64 +183,53 @@ static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct re * @devfn: PCI slot and function to communicate with * @mem: memory resource to be filled in * - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * - * if @bus is NULL, the bus 0 in domain 0 will be used. + * If @bus is NULL, the bus 0 in domain 0 will be used. * If @devfn is 0, it will be replaced by devfn of the P2SB device. * * Caller must provide a valid pointer to @mem. * - * Locking is handled by pci_rescan_remove_lock mutex. - * * Return: * 0 on success or appropriate errno value on error. */ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) { - struct pci_dev *pdev_p2sb; - unsigned int devfn_p2sb; - u32 value = P2SBC_HIDE; + struct p2sb_res_cache *cache; int ret; - /* Get devfn for P2SB device itself */ - ret = p2sb_get_devfn(&devfn_p2sb); - if (ret) - return ret; - - /* if @bus is NULL, use bus 0 in domain 0 */ - bus = bus ?: pci_find_bus(0, 0); - - /* - * Prevent concurrent PCI bus scan from seeing the P2SB device and - * removing via sysfs while it is temporarily exposed. - */ - pci_lock_rescan_remove(); - - /* Unhide the P2SB device, if needed */ - pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); - - pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); - if (devfn) - ret = p2sb_scan_and_read(bus, devfn, mem); - else - ret = p2sb_read_bar0(pdev_p2sb, mem); - pci_stop_and_remove_bus_device(pdev_p2sb); - - /* Hide the P2SB device, if it was hidden */ - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); - - pci_unlock_rescan_remove(); - - if (ret) - return ret; - - if (mem->flags == 0) + bus = p2sb_get_bus(bus); + if (!bus) return -ENODEV; + if (!devfn) { + ret = p2sb_get_devfn(&devfn); + if (ret) + return ret; + } + + cache = &p2sb_resources[PCI_FUNC(devfn)]; + if (cache->bus_dev_id != bus->dev.id) + return -ENODEV; + + if (!p2sb_valid_resource(&cache->res)) + return -ENOENT; + + memcpy(mem, &cache->res, sizeof(*mem)); return 0; } EXPORT_SYMBOL_GPL(p2sb_bar); + +static int __init p2sb_fs_init(void) +{ + p2sb_cache_resources(); + return 0; +} + +/* + * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can + * not be locked in sysfs pci bus rescan path because of deadlock. To + * avoid the deadlock, access to P2SB devices with the lock at an early + * step in kernel initialization and cache required resources. This + * should happen after subsys_initcall which initializes PCI subsystem + * and before device_initcall which requires P2SB resources. + */ +fs_initcall(p2sb_fs_init); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 6edd2e294750..c2fb19af1070 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10511,6 +10511,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, return 0; default: /* Unknown function */ + pr_debug("unknown function 0x%x\n", funcmode); return -EOPNOTSUPP; } return 0; @@ -10696,8 +10697,8 @@ static void dytc_profile_refresh(void) return; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(funcmode, perfmode, &profile); - if (profile != dytc_current_profile) { + err = convert_dytc_to_profile(funcmode, perfmode, &profile); + if (!err && profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); } diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 9a92d515abb9..11d72a353355 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -50,7 +50,7 @@ static const struct property_entry chuwi_hi8_air_props[] = { }; static const struct ts_dmi_data chuwi_hi8_air_data = { - .acpi_name = "MSSL1680:00", + .acpi_name = "MSSL1680", .properties = chuwi_hi8_air_props, }; @@ -913,6 +913,32 @@ static const struct ts_dmi_data teclast_tbook11_data = { .properties = teclast_tbook11_props, }; +static const struct property_entry teclast_x16_plus_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 14), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1916), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1264), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data teclast_x16_plus_data = { + .embedded_fw = { + .name = "silead/gsl3692-teclast-x16-plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 43560, + .sha256 = { 0x9d, 0xb0, 0x3d, 0xf1, 0x00, 0x3c, 0xb5, 0x25, + 0x62, 0x8a, 0xa0, 0x93, 0x4b, 0xe0, 0x4e, 0x75, + 0xd1, 0x27, 0xb1, 0x65, 0x3c, 0xba, 0xa5, 0x0f, + 0xcd, 0xb4, 0xbe, 0x00, 0xbb, 0xf6, 0x43, 0x29 }, + }, + .acpi_name = "MSSL1680:00", + .properties = teclast_x16_plus_props, +}; + static const struct property_entry teclast_x3_plus_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), PROPERTY_ENTRY_U32("touchscreen-size-y", 1500), @@ -1567,6 +1593,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_SKU, "E5A6_A1"), }, }, + { + /* Teclast X16 Plus */ + .driver_data = (void *)&teclast_x16_plus_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_PRODUCT_SKU, "D3A5_A1"), + }, + }, { /* Teclast X3 Plus */ .driver_data = (void *)&teclast_x3_plus_data, @@ -1741,7 +1776,7 @@ static void ts_dmi_add_props(struct i2c_client *client) int error; if (has_acpi_companion(dev) && - !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) { + strstarts(client->name, ts_data->acpi_name)) { error = device_create_managed_software_node(dev, ts_data->properties, NULL); if (error) dev_err(dev, "failed to add properties: %d\n", error); diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 4f05f610391b..c02ce0834c2c 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -151,13 +151,13 @@ static int vendor_resource_matches(struct pnp_dev *dev, static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev, struct acpi_resource_vendor_typed *vendor) { - if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, 16)) { - u64 start, length; + struct { u64 start, length; } range; - memcpy(&start, vendor->byte_data, sizeof(start)); - memcpy(&length, vendor->byte_data + 8, sizeof(length)); - - pnp_add_mem_resource(dev, start, start + length - 1, 0); + if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, + sizeof(range))) { + memcpy(&range, vendor->byte_data, sizeof(range)); + pnp_add_mem_resource(dev, range.start, range.start + + range.length - 1, 0); } } diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 0713a52a2510..17b37354e32c 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client) { struct bq27xxx_device_info *di = i2c_get_clientdata(client); - free_irq(client->irq, di); + if (client->irq) + free_irq(client->irq, di); + bq27xxx_battery_teardown(di); mutex_lock(&battery_mutex); diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 9193c3b8edeb..ae7ee611978b 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -219,7 +219,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) ret = freq_qos_add_request(&policy->constraints, &dtpm_cpu->qos_req, FREQ_QOS_MAX, pd->table[pd->nr_perf_states - 1].frequency); - if (ret) + if (ret < 0) goto out_dtpm_unregister; cpufreq_cpu_put(policy); diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index a43b2babc809..3e9c94a8d7f7 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -38,11 +38,11 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip) return container_of(chip, struct atmel_hlcdc_pwm, chip); } -static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, +static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { - struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); - struct atmel_hlcdc *hlcdc = chip->hlcdc; + struct atmel_hlcdc_pwm *atmel = to_atmel_hlcdc_pwm(chip); + struct atmel_hlcdc *hlcdc = atmel->hlcdc; unsigned int status; int ret; @@ -54,7 +54,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, u32 pwmcfg; int pres; - if (!chip->errata || !chip->errata->slow_clk_erratum) { + if (!atmel->errata || !atmel->errata->slow_clk_erratum) { clk_freq = clk_get_rate(new_clk); if (!clk_freq) return -EINVAL; @@ -64,7 +64,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, } /* Errata: cannot use slow clk on some IP revisions */ - if ((chip->errata && chip->errata->slow_clk_erratum) || + if ((atmel->errata && atmel->errata->slow_clk_erratum) || clk_period_ns > state->period) { new_clk = hlcdc->sys_clk; clk_freq = clk_get_rate(new_clk); @@ -77,8 +77,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) { /* Errata: cannot divide by 1 on some IP revisions */ - if (!pres && chip->errata && - chip->errata->div1_clk_erratum) + if (!pres && atmel->errata && + atmel->errata->div1_clk_erratum) continue; if ((clk_period_ns << pres) >= state->period) @@ -90,7 +90,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, pwmcfg = ATMEL_HLCDC_PWMPS(pres); - if (new_clk != chip->cur_clk) { + if (new_clk != atmel->cur_clk) { u32 gencfg = 0; int ret; @@ -98,8 +98,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, if (ret) return ret; - clk_disable_unprepare(chip->cur_clk); - chip->cur_clk = new_clk; + clk_disable_unprepare(atmel->cur_clk); + atmel->cur_clk = new_clk; if (new_clk == hlcdc->sys_clk) gencfg = ATMEL_HLCDC_CLKPWMSEL; @@ -160,8 +160,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, if (ret) return ret; - clk_disable_unprepare(chip->cur_clk); - chip->cur_clk = NULL; + clk_disable_unprepare(atmel->cur_clk); + atmel->cur_clk = NULL; } return 0; @@ -183,31 +183,32 @@ static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = { #ifdef CONFIG_PM_SLEEP static int atmel_hlcdc_pwm_suspend(struct device *dev) { - struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev); + struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev); /* Keep the periph clock enabled if the PWM is still running. */ - if (pwm_is_enabled(&chip->chip.pwms[0])) - clk_disable_unprepare(chip->hlcdc->periph_clk); + if (!pwm_is_enabled(&atmel->chip.pwms[0])) + clk_disable_unprepare(atmel->hlcdc->periph_clk); return 0; } static int atmel_hlcdc_pwm_resume(struct device *dev) { - struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev); + struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev); struct pwm_state state; int ret; - pwm_get_state(&chip->chip.pwms[0], &state); + pwm_get_state(&atmel->chip.pwms[0], &state); /* Re-enable the periph clock it was stopped during suspend. */ if (!state.enabled) { - ret = clk_prepare_enable(chip->hlcdc->periph_clk); + ret = clk_prepare_enable(atmel->hlcdc->periph_clk); if (ret) return ret; } - return atmel_hlcdc_pwm_apply(&chip->chip, &chip->chip.pwms[0], &state); + return atmel_hlcdc_pwm_apply(&atmel->chip, &atmel->chip.pwms[0], + &state); } #endif @@ -244,14 +245,14 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) { const struct of_device_id *match; struct device *dev = &pdev->dev; - struct atmel_hlcdc_pwm *chip; + struct atmel_hlcdc_pwm *atmel; struct atmel_hlcdc *hlcdc; int ret; hlcdc = dev_get_drvdata(dev->parent); - chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); - if (!chip) + atmel = devm_kzalloc(dev, sizeof(*atmel), GFP_KERNEL); + if (!atmel) return -ENOMEM; ret = clk_prepare_enable(hlcdc->periph_clk); @@ -260,33 +261,31 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node); if (match) - chip->errata = match->data; + atmel->errata = match->data; - chip->hlcdc = hlcdc; - chip->chip.ops = &atmel_hlcdc_pwm_ops; - chip->chip.dev = dev; - chip->chip.npwm = 1; + atmel->hlcdc = hlcdc; + atmel->chip.ops = &atmel_hlcdc_pwm_ops; + atmel->chip.dev = dev; + atmel->chip.npwm = 1; - ret = pwmchip_add(&chip->chip); + ret = pwmchip_add(&atmel->chip); if (ret) { clk_disable_unprepare(hlcdc->periph_clk); return ret; } - platform_set_drvdata(pdev, chip); + platform_set_drvdata(pdev, atmel); return 0; } -static int atmel_hlcdc_pwm_remove(struct platform_device *pdev) +static void atmel_hlcdc_pwm_remove(struct platform_device *pdev) { - struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev); + struct atmel_hlcdc_pwm *atmel = platform_get_drvdata(pdev); - pwmchip_remove(&chip->chip); + pwmchip_remove(&atmel->chip); - clk_disable_unprepare(chip->hlcdc->periph_clk); - - return 0; + clk_disable_unprepare(atmel->hlcdc->periph_clk); } static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = { @@ -301,7 +300,7 @@ static struct platform_driver atmel_hlcdc_pwm_driver = { .pm = &atmel_hlcdc_pwm_pm_ops, }, .probe = atmel_hlcdc_pwm_probe, - .remove = atmel_hlcdc_pwm_remove, + .remove_new = atmel_hlcdc_pwm_remove, }; module_platform_driver(atmel_hlcdc_pwm_driver); diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 0fccf061ab95..8ce6c453adf0 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -289,9 +289,9 @@ static int img_pwm_probe(struct platform_device *pdev) return PTR_ERR(imgchip->sys_clk); } - imgchip->pwm_clk = devm_clk_get(&pdev->dev, "imgchip"); + imgchip->pwm_clk = devm_clk_get(&pdev->dev, "pwm"); if (IS_ERR(imgchip->pwm_clk)) { - dev_err(&pdev->dev, "failed to get imgchip clock\n"); + dev_err(&pdev->dev, "failed to get pwm clock\n"); return PTR_ERR(imgchip->pwm_clk); } diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index 652fdb8dc7bf..0a7920cbd494 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -395,8 +395,17 @@ out: static int sti_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { + struct sti_pwm_chip *pc = to_sti_pwmchip(chip); + struct sti_pwm_compat_data *cdata = pc->cdata; + struct device *dev = pc->dev; int err; + if (pwm->hwpwm >= cdata->pwm_num_devs) { + dev_err(dev, "device %u is not valid for pwm mode\n", + pwm->hwpwm); + return -EINVAL; + } + if (state->polarity != PWM_POLARITY_NORMAL) return -EINVAL; @@ -647,7 +656,7 @@ static int sti_pwm_probe(struct platform_device *pdev) pc->chip.dev = dev; pc->chip.ops = &sti_pwm_ops; - pc->chip.npwm = pc->cdata->pwm_num_devs; + pc->chip.npwm = max(cdata->pwm_num_devs, cdata->cpt_num_devs); for (i = 0; i < cdata->cpt_num_devs; i++) { struct sti_cpt_ddata *ddata = &cdata->ddata[i]; diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 723c70a354a7..4227aba54bb7 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2933,7 +2933,8 @@ static int _regulator_enable(struct regulator *regulator) /* Fallthrough on positive return values - already enabled */ } - rdev->use_count++; + if (regulator->enable_count == 1) + rdev->use_count++; return 0; @@ -3008,37 +3009,40 @@ static int _regulator_disable(struct regulator *regulator) lockdep_assert_held_once(&rdev->mutex.base); - if (WARN(rdev->use_count <= 0, + if (WARN(regulator->enable_count == 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) return -EIO; - /* are we the last user and permitted to disable ? */ - if (rdev->use_count == 1 && - (rdev->constraints && !rdev->constraints->always_on)) { + if (regulator->enable_count == 1) { + /* disabling last enable_count from this regulator */ + /* are we the last user and permitted to disable ? */ + if (rdev->use_count == 1 && + (rdev->constraints && !rdev->constraints->always_on)) { - /* we are last user */ - if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { - ret = _notifier_call_chain(rdev, - REGULATOR_EVENT_PRE_DISABLE, - NULL); - if (ret & NOTIFY_STOP_MASK) - return -EINVAL; + /* we are last user */ + if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { + ret = _notifier_call_chain(rdev, + REGULATOR_EVENT_PRE_DISABLE, + NULL); + if (ret & NOTIFY_STOP_MASK) + return -EINVAL; - ret = _regulator_do_disable(rdev); - if (ret < 0) { - rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret)); - _notifier_call_chain(rdev, - REGULATOR_EVENT_ABORT_DISABLE, + ret = _regulator_do_disable(rdev); + if (ret < 0) { + rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret)); + _notifier_call_chain(rdev, + REGULATOR_EVENT_ABORT_DISABLE, + NULL); + return ret; + } + _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, NULL); - return ret; } - _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, - NULL); - } - rdev->use_count = 0; - } else if (rdev->use_count > 1) { - rdev->use_count--; + rdev->use_count = 0; + } else if (rdev->use_count > 1) { + rdev->use_count--; + } } if (ret == 0) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index a92801e213d6..cfae5de4dfcb 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -169,6 +169,9 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index 115345e9fded..82af27b0e469 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -734,9 +734,25 @@ static int ti_abb_probe(struct platform_device *pdev) return PTR_ERR(abb->setup_reg); } - abb->int_base = devm_platform_ioremap_resource_byname(pdev, "int-address"); - if (IS_ERR(abb->int_base)) - return PTR_ERR(abb->int_base); + pname = "int-address"; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, pname); + if (!res) { + dev_err(dev, "Missing '%s' IO resource\n", pname); + return -ENODEV; + } + /* + * The MPU interrupt status register (PRM_IRQSTATUS_MPU) is + * shared between regulator-abb-{ivahd,dspeve,gpu} driver + * instances. Therefore use devm_ioremap() rather than + * devm_platform_ioremap_resource_byname() to avoid busy + * resource region conflicts. + */ + abb->int_base = devm_ioremap(dev, res->start, + resource_size(res)); + if (!abb->int_base) { + dev_err(dev, "Unable to map '%s'\n", pname); + return -ENOMEM; + } /* Map Optional resources */ pname = "efuse-address"; diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 166019786653..d93113b6ffaa 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -313,7 +313,7 @@ config ST_SLIM_REMOTEPROC config STM32_RPROC tristate "STM32 remoteproc support" - depends on ARCH_STM32 + depends on ARCH_STM32 || COMPILE_TEST depends on REMOTEPROC select MAILBOX help diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 0e95525c1158..ab5e4f02ab22 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -351,6 +351,9 @@ static void rproc_virtio_dev_release(struct device *dev) kfree(vdev); + of_reserved_mem_device_release(&rvdev->pdev->dev); + dma_release_coherent_memory(&rvdev->pdev->dev); + put_device(&rvdev->pdev->dev); } @@ -584,9 +587,6 @@ static int rproc_virtio_remove(struct platform_device *pdev) rproc_remove_subdev(rproc, &rvdev->subdev); rproc_remove_rvdev(rvdev); - of_reserved_mem_device_release(&pdev->dev); - dma_release_coherent_memory(&pdev->dev); - put_device(&rproc->dev); return 0; diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 8746cbb1f168..74da0393172c 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -118,10 +118,10 @@ static int stm32_rproc_mem_alloc(struct rproc *rproc, struct device *dev = rproc->dev.parent; void *va; - dev_dbg(dev, "map memory: %pa+%x\n", &mem->dma, mem->len); - va = ioremap_wc(mem->dma, mem->len); + dev_dbg(dev, "map memory: %pad+%zx\n", &mem->dma, mem->len); + va = (__force void *)ioremap_wc(mem->dma, mem->len); if (IS_ERR_OR_NULL(va)) { - dev_err(dev, "Unable to map memory region: %pa+%x\n", + dev_err(dev, "Unable to map memory region: %pad+0x%zx\n", &mem->dma, mem->len); return -ENOMEM; } @@ -136,7 +136,7 @@ static int stm32_rproc_mem_release(struct rproc *rproc, struct rproc_mem_entry *mem) { dev_dbg(rproc->dev.parent, "unmap memory: %pa\n", &mem->dma); - iounmap(mem->va); + iounmap((__force __iomem void *)mem->va); return 0; } @@ -627,7 +627,7 @@ stm32_rproc_get_loaded_rsc_table(struct rproc *rproc, size_t *table_sz) ddata->rsc_va = devm_ioremap_wc(dev, rsc_pa, RSC_TBL_SIZE); if (IS_ERR_OR_NULL(ddata->rsc_va)) { - dev_err(dev, "Unable to map memory region: %pa+%zx\n", + dev_err(dev, "Unable to map memory region: %pa+%x\n", &rsc_pa, RSC_TBL_SIZE); ddata->rsc_va = NULL; return ERR_PTR(-ENOMEM); @@ -641,7 +641,7 @@ done: * entire area by overwriting it with the initial values stored in rproc->clean_table. */ *table_sz = RSC_TBL_SIZE; - return (struct resource_table *)ddata->rsc_va; + return (__force struct resource_table *)ddata->rsc_va; } static const struct rproc_ops st_rproc_ops = { @@ -889,7 +889,7 @@ static int stm32_rproc_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused stm32_rproc_suspend(struct device *dev) +static int stm32_rproc_suspend(struct device *dev) { struct rproc *rproc = dev_get_drvdata(dev); struct stm32_rproc *ddata = rproc->priv; @@ -900,7 +900,7 @@ static int __maybe_unused stm32_rproc_suspend(struct device *dev) return 0; } -static int __maybe_unused stm32_rproc_resume(struct device *dev) +static int stm32_rproc_resume(struct device *dev) { struct rproc *rproc = dev_get_drvdata(dev); struct stm32_rproc *ddata = rproc->priv; @@ -911,16 +911,16 @@ static int __maybe_unused stm32_rproc_resume(struct device *dev) return 0; } -static SIMPLE_DEV_PM_OPS(stm32_rproc_pm_ops, - stm32_rproc_suspend, stm32_rproc_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(stm32_rproc_pm_ops, + stm32_rproc_suspend, stm32_rproc_resume); static struct platform_driver stm32_rproc_driver = { .probe = stm32_rproc_probe, .remove = stm32_rproc_remove, .driver = { .name = "stm32-rproc", - .pm = &stm32_rproc_pm_ops, - .of_match_table = of_match_ptr(stm32_rproc_match), + .pm = pm_ptr(&stm32_rproc_pm_ops), + .of_match_table = stm32_rproc_match, }, }; module_platform_driver(stm32_rproc_driver); diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 905ac7910c98..f1af0f674615 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -378,6 +378,7 @@ static void virtio_rpmsg_release_device(struct device *dev) struct rpmsg_device *rpdev = to_rpmsg_device(dev); struct virtio_rpmsg_channel *vch = to_virtio_rpmsg_channel(rpdev); + kfree(rpdev->driver_override); kfree(vch); } diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index a3f066673c48..fc52d8bd8f08 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1863,7 +1863,8 @@ config RTC_DRV_MT2712 config RTC_DRV_MT6397 tristate "MediaTek PMIC based RTC" - depends on MFD_MT6397 || (COMPILE_TEST && IRQ_DOMAIN) + depends on MFD_MT6397 || COMPILE_TEST + select IRQ_DOMAIN help This selects the MediaTek(R) RTC driver. RTC is part of MediaTek MT6397 PMIC. You should enable MT6397 PMIC MFD before select diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/lib_test.c index d5caf36c56cd..225c859d6da5 100644 --- a/drivers/rtc/lib_test.c +++ b/drivers/rtc/lib_test.c @@ -54,7 +54,7 @@ static void rtc_time64_to_tm_test_date_range(struct kunit *test) days = div_s64(secs, 86400); - #define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \ + #define FAIL_MSG "%d/%02d/%02d (%2d) : %lld", \ year, month, mday, yday, days KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 00e2ca7374ec..e0a798923ce0 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -231,7 +231,7 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) if (!pm_trace_rtc_valid()) return -EIO; - ret = mc146818_get_time(t); + ret = mc146818_get_time(t, 1000); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); return ret; @@ -292,7 +292,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) - return -EIO; + return -ETIMEDOUT; /* Basic alarms only support hour, minute, and seconds fields. * Some also support day and month, for alarms up to a year in @@ -307,7 +307,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) * * Use the mc146818_avoid_UIP() function to avoid this. */ - if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p)) + if (!mc146818_avoid_UIP(cmos_read_alarm_callback, 10, &p)) return -EIO; if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { @@ -556,8 +556,8 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) * * Use mc146818_avoid_UIP() to avoid this. */ - if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p)) - return -EIO; + if (!mc146818_avoid_UIP(cmos_set_alarm_callback, 10, &p)) + return -ETIMEDOUT; cmos->alarm_expires = rtc_tm_to_time64(&t->time); @@ -818,18 +818,24 @@ static void rtc_wake_off(struct device *dev) } #ifdef CONFIG_X86 -/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ static void use_acpi_alarm_quirks(void) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (dmi_get_bios_year() < 2015) + return; + break; + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + if (dmi_get_bios_year() < 2021) + return; + break; + default: return; - + } if (!is_hpet_enabled()) return; - if (dmi_get_bios_year() < 2015) - return; - use_acpi_alarm = true; } #else diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index f1c09f1db044..651bf3c279c7 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -8,26 +8,31 @@ #include #endif +#define UIP_RECHECK_DELAY 100 /* usec */ +#define UIP_RECHECK_DELAY_MS (USEC_PER_MSEC / UIP_RECHECK_DELAY) +#define UIP_RECHECK_LOOPS_MS(x) (x / UIP_RECHECK_DELAY_MS) + /* * Execute a function while the UIP (Update-in-progress) bit of the RTC is - * unset. + * unset. The timeout is configurable by the caller in ms. * * Warning: callback may be executed more then once. */ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param) { int i; unsigned long flags; unsigned char seconds; - for (i = 0; i < 100; i++) { + for (i = 0; UIP_RECHECK_LOOPS_MS(i) < timeout; i++) { spin_lock_irqsave(&rtc_lock, flags); /* * Check whether there is an update in progress during which the * readout is unspecified. The maximum update time is ~2ms. Poll - * every 100 usec for completion. + * for completion. * * Store the second value before checking UIP so a long lasting * NMI which happens to hit after the UIP check cannot make @@ -37,7 +42,7 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); - udelay(100); + udelay(UIP_RECHECK_DELAY); continue; } @@ -56,7 +61,7 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), */ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); - udelay(100); + udelay(UIP_RECHECK_DELAY); continue; } @@ -72,6 +77,10 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), } spin_unlock_irqrestore(&rtc_lock, flags); + if (UIP_RECHECK_LOOPS_MS(i) >= 100) + pr_warn("Reading current time from RTC took around %li ms\n", + UIP_RECHECK_LOOPS_MS(i)); + return true; } return false; @@ -84,7 +93,7 @@ EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); */ bool mc146818_does_rtc_work(void) { - return mc146818_avoid_UIP(NULL, NULL); + return mc146818_avoid_UIP(NULL, 1000, NULL); } EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); @@ -130,15 +139,27 @@ static void mc146818_get_time_callback(unsigned char seconds, void *param_in) p->ctrl = CMOS_READ(RTC_CONTROL); } -int mc146818_get_time(struct rtc_time *time) +/** + * mc146818_get_time - Get the current time from the RTC + * @time: pointer to struct rtc_time to store the current time + * @timeout: timeout value in ms + * + * This function reads the current time from the RTC and stores it in the + * provided struct rtc_time. The timeout parameter specifies the maximum + * time to wait for the RTC to become ready. + * + * Return: 0 on success, -ETIMEDOUT if the RTC did not become ready within + * the specified timeout, or another error code if an error occurred. + */ +int mc146818_get_time(struct rtc_time *time, int timeout) { struct mc146818_get_time_callback_param p = { .time = time }; - if (!mc146818_avoid_UIP(mc146818_get_time_callback, &p)) { + if (!mc146818_avoid_UIP(mc146818_get_time_callback, timeout, &p)) { memset(time, 0, sizeof(*time)); - return -EIO; + return -ETIMEDOUT; } if (!(p.ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index f207de4a87a0..341d65acd715 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -8,9 +8,6 @@ * Copyright IBM Corp. 1999, 2009 */ -#define KMSG_COMPONENT "dasd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - #include #include #include @@ -73,7 +70,8 @@ static void dasd_profile_init(struct dasd_profile *, struct dentry *); static void dasd_profile_exit(struct dasd_profile *); static void dasd_hosts_init(struct dentry *, struct dasd_device *); static void dasd_hosts_exit(struct dasd_device *); - +static int dasd_handle_autoquiesce(struct dasd_device *, struct dasd_ccw_req *, + unsigned int); /* * SECTION: Operations on the device structure. */ @@ -2327,7 +2325,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) /* Non-temporary stop condition will trigger fail fast */ if (device->stopped & ~DASD_STOPPED_PENDING && test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!dasd_eer_enabled(device))) { + !dasd_eer_enabled(device) && device->aq_mask == 0) { cqr->status = DASD_CQR_FAILED; cqr->intrc = -ENOLINK; continue; @@ -2803,20 +2801,18 @@ restart: dasd_log_sense(cqr, &cqr->irb); } - /* First of all call extended error reporting. */ - if (dasd_eer_enabled(base) && - cqr->status == DASD_CQR_FAILED) { - dasd_eer_write(base, cqr, DASD_EER_FATALERROR); - - /* restart request */ + /* + * First call extended error reporting and check for autoquiesce + */ + spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); + if (cqr->status == DASD_CQR_FAILED && + dasd_handle_autoquiesce(base, cqr, DASD_EER_FATALERROR)) { cqr->status = DASD_CQR_FILLED; cqr->retries = 255; - spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); - spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), - flags); + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); goto restart; } + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); /* Process finished ERP request. */ if (cqr->refers) { @@ -2858,7 +2854,7 @@ static void __dasd_block_start_head(struct dasd_block *block) /* Non-temporary stop condition will trigger fail fast */ if (block->base->stopped & ~DASD_STOPPED_PENDING && test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!dasd_eer_enabled(block->base))) { + !dasd_eer_enabled(block->base) && block->base->aq_mask == 0) { cqr->status = DASD_CQR_FAILED; cqr->intrc = -ENOLINK; dasd_schedule_block_bh(block); @@ -3391,8 +3387,7 @@ static void dasd_generic_auto_online(void *data, async_cookie_t cookie) ret = ccw_device_set_online(cdev); if (ret) - pr_warn("%s: Setting the DASD online failed with rc=%d\n", - dev_name(&cdev->dev), ret); + dev_warn(&cdev->dev, "Setting the DASD online failed with rc=%d\n", ret); } /* @@ -3479,8 +3474,11 @@ int dasd_generic_set_online(struct ccw_device *cdev, { struct dasd_discipline *discipline; struct dasd_device *device; + struct device *dev; int rc; + dev = &cdev->dev; + /* first online clears initial online feature flag */ dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0); device = dasd_create_device(cdev); @@ -3493,11 +3491,10 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* Try to load the required module. */ rc = request_module(DASD_DIAG_MOD); if (rc) { - pr_warn("%s Setting the DASD online failed " - "because the required module %s " - "could not be loaded (rc=%d)\n", - dev_name(&cdev->dev), DASD_DIAG_MOD, - rc); + dev_warn(dev, "Setting the DASD online failed " + "because the required module %s " + "could not be loaded (rc=%d)\n", + DASD_DIAG_MOD, rc); dasd_delete_device(device); return -ENODEV; } @@ -3505,8 +3502,7 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* Module init could have failed, so check again here after * request_module(). */ if (!dasd_diag_discipline_pointer) { - pr_warn("%s Setting the DASD online failed because of missing DIAG discipline\n", - dev_name(&cdev->dev)); + dev_warn(dev, "Setting the DASD online failed because of missing DIAG discipline\n"); dasd_delete_device(device); return -ENODEV; } @@ -3516,37 +3512,33 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_delete_device(device); return -EINVAL; } + device->base_discipline = base_discipline; if (!try_module_get(discipline->owner)) { - module_put(base_discipline->owner); dasd_delete_device(device); return -EINVAL; } - device->base_discipline = base_discipline; device->discipline = discipline; /* check_device will allocate block device if necessary */ rc = discipline->check_device(device); if (rc) { - pr_warn("%s Setting the DASD online with discipline %s failed with rc=%i\n", - dev_name(&cdev->dev), discipline->name, rc); - module_put(discipline->owner); - module_put(base_discipline->owner); + dev_warn(dev, "Setting the DASD online with discipline %s failed with rc=%i\n", + discipline->name, rc); dasd_delete_device(device); return rc; } dasd_set_target_state(device, DASD_STATE_ONLINE); if (device->state <= DASD_STATE_KNOWN) { - pr_warn("%s Setting the DASD online failed because of a missing discipline\n", - dev_name(&cdev->dev)); + dev_warn(dev, "Setting the DASD online failed because of a missing discipline\n"); rc = -ENODEV; dasd_set_target_state(device, DASD_STATE_NEW); if (device->block) dasd_free_block(device->block); dasd_delete_device(device); - } else - pr_debug("dasd_generic device %s found\n", - dev_name(&cdev->dev)); + } else { + dev_dbg(dev, "dasd_generic device found\n"); + } wait_event(dasd_init_waitq, _wait_for_device(device)); @@ -3557,10 +3549,13 @@ EXPORT_SYMBOL_GPL(dasd_generic_set_online); int dasd_generic_set_offline(struct ccw_device *cdev) { + int max_count, open_count, rc; struct dasd_device *device; struct dasd_block *block; - int max_count, open_count, rc; unsigned long flags; + struct device *dev; + + dev = &cdev->dev; rc = 0; spin_lock_irqsave(get_ccwdev_lock(cdev), flags); @@ -3581,11 +3576,10 @@ int dasd_generic_set_offline(struct ccw_device *cdev) open_count = atomic_read(&device->block->open_count); if (open_count > max_count) { if (open_count > 0) - pr_warn("%s: The DASD cannot be set offline with open count %i\n", - dev_name(&cdev->dev), open_count); + dev_warn(dev, "The DASD cannot be set offline with open count %i\n", + open_count); else - pr_warn("%s: The DASD cannot be set offline while it is in use\n", - dev_name(&cdev->dev)); + dev_warn(dev, "The DASD cannot be set offline while it is in use\n"); rc = -EBUSY; goto out_err; } @@ -3682,8 +3676,8 @@ int dasd_generic_last_path_gone(struct dasd_device *device) dev_warn(&device->cdev->dev, "No operational channel path is left " "for the device\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last path gone"); - /* First of all call extended error reporting. */ - dasd_eer_write(device, NULL, DASD_EER_NOPATH); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH); if (device->state < DASD_STATE_BASIC) return 0; @@ -3815,7 +3809,8 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event) "No verified channel paths remain for the device\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last verified path gone"); - dasd_eer_write(device, NULL, DASD_EER_NOPATH); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH); dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT); } @@ -3837,7 +3832,8 @@ EXPORT_SYMBOL_GPL(dasd_generic_verify_path); void dasd_generic_space_exhaust(struct dasd_device *device, struct dasd_ccw_req *cqr) { - dasd_eer_write(device, NULL, DASD_EER_NOSPC); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOSPC); if (device->state < DASD_STATE_BASIC) return; @@ -3931,6 +3927,31 @@ void dasd_schedule_requeue(struct dasd_device *device) } EXPORT_SYMBOL(dasd_schedule_requeue); +static int dasd_handle_autoquiesce(struct dasd_device *device, + struct dasd_ccw_req *cqr, + unsigned int reason) +{ + /* in any case write eer message with reason */ + if (dasd_eer_enabled(device)) + dasd_eer_write(device, cqr, reason); + + if (!test_bit(reason, &device->aq_mask)) + return 0; + + /* notify eer about autoquiesce */ + if (dasd_eer_enabled(device)) + dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE); + + dev_info(&device->cdev->dev, + "The DASD has been put in the quiesce state\n"); + dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE); + + if (device->features & DASD_FEATURE_REQUEUEQUIESCE) + dasd_schedule_requeue(device); + + return 1; +} + static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, int rdc_buffer_size, int magic) diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index d4d31cd11d26..d16c699b9ac6 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -387,6 +387,7 @@ void dasd_eer_write(struct dasd_device *device, struct dasd_ccw_req *cqr, break; case DASD_EER_NOPATH: case DASD_EER_NOSPC: + case DASD_EER_AUTOQUIESCE: dasd_eer_write_standard_trigger(device, NULL, id); break; case DASD_EER_STATECHANGE: diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index f50932518f83..00bcd177264a 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -464,6 +464,7 @@ extern struct dasd_discipline *dasd_diag_discipline_pointer; #define DASD_EER_STATECHANGE 3 #define DASD_EER_PPRCSUSPEND 4 #define DASD_EER_NOSPC 5 +#define DASD_EER_AUTOQUIESCE 31 /* DASD path handling */ @@ -641,6 +642,7 @@ struct dasd_device { struct dasd_format_entry format_entry; struct kset *paths_info; struct dasd_copy_relation *copy; + unsigned long aq_mask; }; struct dasd_block { diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index c533d1dadc6b..a5dba3829769 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, return -EINVAL; if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; - if (cdev->private->state == DEV_STATE_VERIFY) { + if (cdev->private->state == DEV_STATE_VERIFY || + cdev->private->flags.doverify) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_CMD_IRB; @@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, } if (cdev->private->state != DEV_STATE_ONLINE || ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) && - !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) || - cdev->private->flags.doverify) + !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS))) return -EBUSY; ret = cio_set_options (sch, flags); if (ret) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 934515959ebf..86a8bd532489 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -425,6 +425,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", __func__, nisc, isc, q->apqn); + vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1); status.response_code = AP_RESPONSE_INVALID_GISA; return status; } @@ -638,8 +639,7 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) * Return: a boolean value indicating whether the KVM guest's APCB was changed * by the filtering or not. */ -static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, - struct ap_matrix_mdev *matrix_mdev) +static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev) { unsigned long apid, apqi, apqn; DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES); @@ -660,8 +660,9 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm, (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS); - for_each_set_bit_inv(apid, apm, AP_DEVICES) { - for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { + for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, + AP_DOMAINS) { /* * If the APQN is not bound to the vfio_ap device * driver, then we can't assign it to the guest's @@ -930,7 +931,6 @@ static ssize_t assign_adapter_store(struct device *dev, { int ret; unsigned long apid; - DECLARE_BITMAP(apm_delta, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -959,11 +959,8 @@ static ssize_t assign_adapter_store(struct device *dev, } vfio_ap_mdev_link_adapter(matrix_mdev, apid); - memset(apm_delta, 0, sizeof(apm_delta)); - set_bit_inv(apid, apm_delta); - if (vfio_ap_mdev_filter_matrix(apm_delta, - matrix_mdev->matrix.aqm, matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev)) vfio_ap_mdev_update_guest_apcb(matrix_mdev); ret = count; @@ -1139,7 +1136,6 @@ static ssize_t assign_domain_store(struct device *dev, { int ret; unsigned long apqi; - DECLARE_BITMAP(aqm_delta, AP_DOMAINS); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -1168,11 +1164,8 @@ static ssize_t assign_domain_store(struct device *dev, } vfio_ap_mdev_link_domain(matrix_mdev, apqi); - memset(aqm_delta, 0, sizeof(aqm_delta)); - set_bit_inv(apqi, aqm_delta); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta, - matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev)) vfio_ap_mdev_update_guest_apcb(matrix_mdev); ret = count; @@ -1749,6 +1742,7 @@ static ssize_t status_show(struct device *dev, { ssize_t nchars = 0; struct vfio_ap_queue *q; + unsigned long apid, apqi; struct ap_matrix_mdev *matrix_mdev; struct ap_device *apdev = to_ap_dev(dev); @@ -1756,8 +1750,21 @@ static ssize_t status_show(struct device *dev, q = dev_get_drvdata(&apdev->device); matrix_mdev = vfio_ap_mdev_for_queue(q); + /* If the queue is assigned to the matrix mediated device, then + * determine whether it is passed through to a guest; otherwise, + * indicate that it is unassigned. + */ if (matrix_mdev) { - if (matrix_mdev->kvm) + apid = AP_QID_CARD(q->apqn); + apqi = AP_QID_QUEUE(q->apqn); + /* + * If the queue is passed through to the guest, then indicate + * that it is in use; otherwise, indicate that it is + * merely assigned to a matrix mediated device. + */ + if (matrix_mdev->kvm && + test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && + test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) nchars = scnprintf(buf, PAGE_SIZE, "%s\n", AP_QUEUE_IN_USE); else @@ -1858,11 +1865,22 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) if (matrix_mdev) { vfio_ap_mdev_link_queue(matrix_mdev, q); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, - matrix_mdev->matrix.aqm, - matrix_mdev)) + /* + * If we're in the process of handling the adding of adapters or + * domains to the host's AP configuration, then let the + * vfio_ap device driver's on_scan_complete callback filter the + * matrix and update the guest's AP configuration after all of + * the new queue devices are probed. + */ + if (!bitmap_empty(matrix_mdev->apm_add, AP_DEVICES) || + !bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS)) + goto done; + + if (vfio_ap_mdev_filter_matrix(matrix_mdev)) vfio_ap_mdev_update_guest_apcb(matrix_mdev); } + +done: dev_set_drvdata(&apdev->device, q); release_update_locks_for_mdev(matrix_mdev); @@ -2211,34 +2229,22 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info, static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev) { - bool do_hotplug = false; - int filter_domains = 0; - int filter_adapters = 0; - DECLARE_BITMAP(apm, AP_DEVICES); - DECLARE_BITMAP(aqm, AP_DOMAINS); + bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false; mutex_lock(&matrix_mdev->kvm->lock); mutex_lock(&matrix_dev->mdevs_lock); - filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm, - matrix_mdev->apm_add, AP_DEVICES); - filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm, - matrix_mdev->aqm_add, AP_DOMAINS); + filter_adapters = bitmap_intersects(matrix_mdev->matrix.apm, + matrix_mdev->apm_add, AP_DEVICES); + filter_domains = bitmap_intersects(matrix_mdev->matrix.aqm, + matrix_mdev->aqm_add, AP_DOMAINS); + filter_cdoms = bitmap_intersects(matrix_mdev->matrix.adm, + matrix_mdev->adm_add, AP_DOMAINS); - if (filter_adapters && filter_domains) - do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev); - else if (filter_adapters) - do_hotplug |= - vfio_ap_mdev_filter_matrix(apm, - matrix_mdev->shadow_apcb.aqm, - matrix_mdev); - else - do_hotplug |= - vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm, - aqm, matrix_mdev); + if (filter_adapters || filter_domains) + do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev); - if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add, - AP_DOMAINS)) + if (filter_cdoms) do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev); if (do_hotplug) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 28e34d155334..6f44963d34bb 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -617,6 +617,7 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc, { if (!zq || !try_module_get(zq->queue->ap_dev.device.driver->owner)) return NULL; + zcrypt_card_get(zc); zcrypt_queue_get(zq); get_device(&zq->queue->ap_dev.device); atomic_add(weight, &zc->load); @@ -636,6 +637,7 @@ static inline void zcrypt_drop_queue(struct zcrypt_card *zc, atomic_sub(weight, &zq->load); put_device(&zq->queue->ap_dev.device); zcrypt_queue_put(zq); + zcrypt_card_put(zc); module_put(mod); } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c0f30cefec10..a41601139185 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -254,9 +254,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover) if (!recover) { hash_del(&addr->hnode); kfree(addr); - continue; + } else { + /* prepare for recovery */ + addr->disp_flag = QETH_DISP_ADDR_ADD; } - addr->disp_flag = QETH_DISP_ADDR_ADD; } mutex_unlock(&card->ip_lock); @@ -277,9 +278,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (addr->disp_flag == QETH_DISP_ADDR_ADD) { rc = qeth_l3_register_addr_entry(card, addr); - if (!rc) { + if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { + /* keep it in the records */ addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; } else { + /* bad address */ hash_del(&addr->hnode); kfree(addr); } diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 03e71e3d5e5b..3b990cf2c195 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1285,7 +1285,7 @@ source "drivers/scsi/arm/Kconfig" config JAZZ_ESP bool "MIPS JAZZ FAS216 SCSI support" - depends on MACH_JAZZ && SCSI + depends on MACH_JAZZ && SCSI=y select SCSI_SPI_ATTRS help This is the driver for the onboard SCSI host adapter of MIPS Magnum diff --git a/drivers/scsi/arcmsr/arcmsr.h b/drivers/scsi/arcmsr/arcmsr.h index 07df255c4b1b..b513d4d9c35a 100644 --- a/drivers/scsi/arcmsr/arcmsr.h +++ b/drivers/scsi/arcmsr/arcmsr.h @@ -77,9 +77,13 @@ struct device_attribute; #ifndef PCI_DEVICE_ID_ARECA_1203 #define PCI_DEVICE_ID_ARECA_1203 0x1203 #endif +#ifndef PCI_DEVICE_ID_ARECA_1883 +#define PCI_DEVICE_ID_ARECA_1883 0x1883 +#endif #ifndef PCI_DEVICE_ID_ARECA_1884 #define PCI_DEVICE_ID_ARECA_1884 0x1884 #endif +#define PCI_DEVICE_ID_ARECA_1886_0 0x1886 #define PCI_DEVICE_ID_ARECA_1886 0x188A #define ARCMSR_HOURS (1000 * 60 * 60 * 4) #define ARCMSR_MINUTES (1000 * 60 * 60) diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index d3fb8a9c1c39..fc9d4005830b 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -214,8 +214,12 @@ static struct pci_device_id arcmsr_device_id_table[] = { .driver_data = ACB_ADAPTER_TYPE_A}, {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1880), .driver_data = ACB_ADAPTER_TYPE_C}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1883), + .driver_data = ACB_ADAPTER_TYPE_C}, {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1884), .driver_data = ACB_ADAPTER_TYPE_E}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1886_0), + .driver_data = ACB_ADAPTER_TYPE_F}, {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1886), .driver_data = ACB_ADAPTER_TYPE_F}, {0, 0}, /* Terminating entry */ @@ -4708,9 +4712,11 @@ static const char *arcmsr_info(struct Scsi_Host *host) case PCI_DEVICE_ID_ARECA_1680: case PCI_DEVICE_ID_ARECA_1681: case PCI_DEVICE_ID_ARECA_1880: + case PCI_DEVICE_ID_ARECA_1883: case PCI_DEVICE_ID_ARECA_1884: type = "SAS/SATA"; break; + case PCI_DEVICE_ID_ARECA_1886_0: case PCI_DEVICE_ID_ARECA_1886: type = "NVMe/SAS/SATA"; break; diff --git a/drivers/scsi/bfa/bfa.h b/drivers/scsi/bfa/bfa.h index 7bd2ba1ad4d1..f30fe324e6ec 100644 --- a/drivers/scsi/bfa/bfa.h +++ b/drivers/scsi/bfa/bfa.h @@ -20,7 +20,6 @@ struct bfa_s; typedef void (*bfa_isr_func_t) (struct bfa_s *bfa, struct bfi_msg_s *m); -typedef void (*bfa_cb_cbfn_status_t) (void *cbarg, bfa_status_t status); /* * Interrupt message handlers @@ -437,4 +436,12 @@ struct bfa_cb_pending_q_s { (__qe)->data = (__data); \ } while (0) +#define bfa_pending_q_init_status(__qe, __cbfn, __cbarg, __data) do { \ + bfa_q_qe_init(&((__qe)->hcb_qe.qe)); \ + (__qe)->hcb_qe.cbfn_status = (__cbfn); \ + (__qe)->hcb_qe.cbarg = (__cbarg); \ + (__qe)->hcb_qe.pre_rmv = BFA_TRUE; \ + (__qe)->data = (__data); \ +} while (0) + #endif /* __BFA_H__ */ diff --git a/drivers/scsi/bfa/bfa_core.c b/drivers/scsi/bfa/bfa_core.c index 6846ca8f7313..3438d0b8ba06 100644 --- a/drivers/scsi/bfa/bfa_core.c +++ b/drivers/scsi/bfa/bfa_core.c @@ -1907,15 +1907,13 @@ bfa_comp_process(struct bfa_s *bfa, struct list_head *comp_q) struct list_head *qe; struct list_head *qen; struct bfa_cb_qe_s *hcb_qe; - bfa_cb_cbfn_status_t cbfn; list_for_each_safe(qe, qen, comp_q) { hcb_qe = (struct bfa_cb_qe_s *) qe; if (hcb_qe->pre_rmv) { /* qe is invalid after return, dequeue before cbfn() */ list_del(qe); - cbfn = (bfa_cb_cbfn_status_t)(hcb_qe->cbfn); - cbfn(hcb_qe->cbarg, hcb_qe->fw_status); + hcb_qe->cbfn_status(hcb_qe->cbarg, hcb_qe->fw_status); } else hcb_qe->cbfn(hcb_qe->cbarg, BFA_TRUE); } diff --git a/drivers/scsi/bfa/bfa_ioc.h b/drivers/scsi/bfa/bfa_ioc.h index 933a1c3890ff..5e568d6d7b26 100644 --- a/drivers/scsi/bfa/bfa_ioc.h +++ b/drivers/scsi/bfa/bfa_ioc.h @@ -361,14 +361,18 @@ struct bfa_reqq_wait_s { void *cbarg; }; -typedef void (*bfa_cb_cbfn_t) (void *cbarg, bfa_boolean_t complete); +typedef void (*bfa_cb_cbfn_t) (void *cbarg, bfa_boolean_t complete); +typedef void (*bfa_cb_cbfn_status_t) (void *cbarg, bfa_status_t status); /* * Generic BFA callback element. */ struct bfa_cb_qe_s { struct list_head qe; - bfa_cb_cbfn_t cbfn; + union { + bfa_cb_cbfn_status_t cbfn_status; + bfa_cb_cbfn_t cbfn; + }; bfa_boolean_t once; bfa_boolean_t pre_rmv; /* set for stack based qe(s) */ bfa_status_t fw_status; /* to access fw status in comp proc */ diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index be8dfbe13e90..524e4e6979c9 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -2135,8 +2135,7 @@ bfad_iocmd_fcport_get_stats(struct bfad_s *bfad, void *cmd) struct bfa_cb_pending_q_s cb_qe; init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, &iocmd->stats); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, &iocmd->stats); spin_lock_irqsave(&bfad->bfad_lock, flags); iocmd->status = bfa_fcport_get_stats(&bfad->bfa, &cb_qe); spin_unlock_irqrestore(&bfad->bfad_lock, flags); @@ -2159,7 +2158,7 @@ bfad_iocmd_fcport_reset_stats(struct bfad_s *bfad, void *cmd) struct bfa_cb_pending_q_s cb_qe; init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, &fcomp, NULL); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, NULL); spin_lock_irqsave(&bfad->bfad_lock, flags); iocmd->status = bfa_fcport_clear_stats(&bfad->bfa, &cb_qe); @@ -2443,8 +2442,7 @@ bfad_iocmd_qos_get_stats(struct bfad_s *bfad, void *cmd) struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(&bfad->bfa); init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, &iocmd->stats); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, &iocmd->stats); spin_lock_irqsave(&bfad->bfad_lock, flags); WARN_ON(!bfa_ioc_get_fcmode(&bfad->bfa.ioc)); @@ -2474,8 +2472,7 @@ bfad_iocmd_qos_reset_stats(struct bfad_s *bfad, void *cmd) struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(&bfad->bfa); init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, NULL); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, NULL); spin_lock_irqsave(&bfad->bfad_lock, flags); WARN_ON(!bfa_ioc_get_fcmode(&bfad->bfa.ioc)); diff --git a/drivers/scsi/csiostor/csio_defs.h b/drivers/scsi/csiostor/csio_defs.h index c38017b4af98..e50e93e7fe5a 100644 --- a/drivers/scsi/csiostor/csio_defs.h +++ b/drivers/scsi/csiostor/csio_defs.h @@ -73,7 +73,21 @@ csio_list_deleted(struct list_head *list) #define csio_list_prev(elem) (((struct list_head *)(elem))->prev) /* State machine */ -typedef void (*csio_sm_state_t)(void *, uint32_t); +struct csio_lnode; + +/* State machine evets */ +enum csio_ln_ev { + CSIO_LNE_NONE = (uint32_t)0, + CSIO_LNE_LINKUP, + CSIO_LNE_FAB_INIT_DONE, + CSIO_LNE_LINK_DOWN, + CSIO_LNE_DOWN_LINK, + CSIO_LNE_LOGO, + CSIO_LNE_CLOSE, + CSIO_LNE_MAX_EVENT, +}; + +typedef void (*csio_sm_state_t)(struct csio_lnode *ln, enum csio_ln_ev evt); struct csio_sm { struct list_head sm_list; @@ -83,7 +97,7 @@ struct csio_sm { static inline void csio_set_state(void *smp, void *state) { - ((struct csio_sm *)smp)->sm_state = (csio_sm_state_t)state; + ((struct csio_sm *)smp)->sm_state = state; } static inline void diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index d5ac93897023..5b3ffefae476 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -1095,7 +1095,7 @@ csio_handle_link_down(struct csio_hw *hw, uint8_t portid, uint32_t fcfi, int csio_is_lnode_ready(struct csio_lnode *ln) { - return (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)); + return (csio_get_state(ln) == csio_lns_ready); } /*****************************************************************************/ @@ -1366,15 +1366,15 @@ csio_free_fcfinfo(struct kref *kref) void csio_lnode_state_to_str(struct csio_lnode *ln, int8_t *str) { - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_uninit)) { + if (csio_get_state(ln) == csio_lns_uninit) { strcpy(str, "UNINIT"); return; } - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)) { + if (csio_get_state(ln) == csio_lns_ready) { strcpy(str, "READY"); return; } - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_offline)) { + if (csio_get_state(ln) == csio_lns_offline) { strcpy(str, "OFFLINE"); return; } diff --git a/drivers/scsi/csiostor/csio_lnode.h b/drivers/scsi/csiostor/csio_lnode.h index 372a67d122d3..607698a0f063 100644 --- a/drivers/scsi/csiostor/csio_lnode.h +++ b/drivers/scsi/csiostor/csio_lnode.h @@ -53,19 +53,6 @@ extern int csio_fcoe_rnodes; extern int csio_fdmi_enable; -/* State machine evets */ -enum csio_ln_ev { - CSIO_LNE_NONE = (uint32_t)0, - CSIO_LNE_LINKUP, - CSIO_LNE_FAB_INIT_DONE, - CSIO_LNE_LINK_DOWN, - CSIO_LNE_DOWN_LINK, - CSIO_LNE_LOGO, - CSIO_LNE_CLOSE, - CSIO_LNE_MAX_EVENT, -}; - - struct csio_fcf_info { struct list_head list; uint8_t priority; diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 8a4124e7d204..ddc048069af2 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip) { struct fcoe_fcf *sel; struct fcoe_fcf *fcf; - unsigned long flags; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = NULL; list_for_each_entry(fcf, &fip->fcfs, list) fcf->flogi_sent = 0; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); sel = fip->sel_fcf; if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr)) @@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, { struct fc_frame *fp; struct fc_frame_header *fh; - unsigned long flags; u16 old_xid; u8 op; u8 mac[ETH_ALEN]; @@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, op = FIP_DT_FLOGI; if (fip->mode == FIP_MODE_VN2VN) break; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = skb; fip->flogi_req_send = 1; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); schedule_work(&fip->timer_work); return -EINPROGRESS; case ELS_FDISC: @@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip) static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; int error; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n"); fcf = fcoe_ctlr_select(fip); if (!fcf || fcf->flogi_sent) { @@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) fcoe_ctlr_solicit(fip, NULL); error = fcoe_ctlr_flogi_send_locked(fip); } - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); mutex_unlock(&fip->ctlr_mutex); return error; } @@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); fcf = fip->sel_fcf; if (!fcf || !fip->flogi_req_send) goto unlock; @@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) } else /* XXX */ LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n"); unlock: - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); } /** diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 0c80ff9affa3..7ae56a2fe232 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -1565,6 +1565,11 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba) } phy->port_id = port_id; + spin_lock(&phy->lock); + /* Delete timer and set phy_attached atomically */ + del_timer(&phy->timer); + phy->phy_attached = 1; + spin_unlock(&phy->lock); /* * Call pm_runtime_get_noresume() which pairs with @@ -1578,11 +1583,6 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba) res = IRQ_HANDLED; - spin_lock(&phy->lock); - /* Delete timer and set phy_attached atomically */ - del_timer(&phy->timer); - phy->phy_attached = 1; - spin_unlock(&phy->lock); end: if (phy->reset_completion) complete(phy->reset_completion); diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 8b825364baad..c785493b105c 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -353,12 +353,13 @@ static void scsi_host_dev_release(struct device *dev) if (shost->shost_state == SHOST_CREATED) { /* - * Free the shost_dev device name here if scsi_host_alloc() - * and scsi_host_put() have been called but neither + * Free the shost_dev device name and remove the proc host dir + * here if scsi_host_{alloc,put}() have been called but neither * scsi_host_add() nor scsi_host_remove() has been called. * This avoids that the memory allocated for the shost_dev - * name is leaked. + * name as well as the proc dir structure are leaked. */ + scsi_proc_hostdir_rm(shost->hostt); kfree(dev_name(&shost->shost_dev)); } diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 6370cdbfba08..0f0732d56800 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -3390,7 +3390,7 @@ static enum sci_status isci_io_request_build(struct isci_host *ihost, return SCI_FAILURE; } - return SCI_SUCCESS; + return status; } static struct isci_request *isci_request_from_tag(struct isci_host *ihost, u16 tag) diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 945adca5e72f..05be0810b5e3 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -265,6 +265,11 @@ static int fc_fcp_send_abort(struct fc_fcp_pkt *fsp) if (!fsp->seq_ptr) return -EINVAL; + if (fsp->state & FC_SRB_ABORT_PENDING) { + FC_FCP_DBG(fsp, "abort already pending\n"); + return -EBUSY; + } + this_cpu_inc(fsp->lp->stats->FcpPktAborts); fsp->state |= FC_SRB_ABORT_PENDING; @@ -1671,7 +1676,7 @@ static void fc_fcp_rec_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp) if (fsp->recov_retry++ < FC_MAX_RECOV_RETRY) fc_fcp_rec(fsp); else - fc_fcp_recovery(fsp, FC_ERROR); + fc_fcp_recovery(fsp, FC_TIMED_OUT); break; } fc_fcp_unlock_pkt(fsp); @@ -1690,11 +1695,12 @@ static void fc_fcp_recovery(struct fc_fcp_pkt *fsp, u8 code) fsp->status_code = code; fsp->cdb_status = 0; fsp->io_status = 0; - /* - * if this fails then we let the scsi command timer fire and - * scsi-ml escalate. - */ - fc_fcp_send_abort(fsp); + if (!fsp->cmd) + /* + * Only abort non-scsi commands; otherwise let the + * scsi command timer fire and scsi-ml escalate. + */ + fc_fcp_send_abort(fsp); } /** diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 63a23251fb1d..4b5ceba68e46 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1651,6 +1651,16 @@ out_err: /* ---------- Domain revalidation ---------- */ +static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp, + u8 *sas_addr, + enum sas_device_type *type) +{ + memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE); + *type = to_dev_type(&disc_resp->disc); + if (*type == SAS_PHY_UNUSED) + memset(sas_addr, 0, SAS_ADDR_SIZE); +} + static int sas_get_phy_discover(struct domain_device *dev, int phy_id, struct smp_disc_resp *disc_resp) { @@ -1704,13 +1714,8 @@ int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, return -ENOMEM; res = sas_get_phy_discover(dev, phy_id, disc_resp); - if (res == 0) { - memcpy(sas_addr, disc_resp->disc.attached_sas_addr, - SAS_ADDR_SIZE); - *type = to_dev_type(&disc_resp->disc); - if (*type == 0) - memset(sas_addr, 0, SAS_ADDR_SIZE); - } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type); kfree(disc_resp); return res; } @@ -1972,6 +1977,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; enum sas_device_type type = SAS_PHY_UNUSED; + struct smp_disc_resp *disc_resp; u8 sas_addr[SAS_ADDR_SIZE]; char msg[80] = ""; int res; @@ -1983,33 +1989,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(dev->sas_addr), phy_id, msg); memset(sas_addr, 0, SAS_ADDR_SIZE); - res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type); + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); + if (!disc_resp) + return -ENOMEM; + + res = sas_get_phy_discover(dev, phy_id, disc_resp); switch (res) { case SMP_RESP_NO_PHY: phy->phy_state = PHY_NOT_PRESENT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_PHY_VACANT: phy->phy_state = PHY_VACANT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_FUNC_ACC: break; case -ECOMM: break; default: - return res; + goto out_free_resp; } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type); + if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) { phy->phy_state = PHY_EMPTY; sas_unregister_devs_sas_addr(dev, phy_id, last); /* - * Even though the PHY is empty, for convenience we discover - * the PHY to update the PHY info, like negotiated linkrate. + * Even though the PHY is empty, for convenience we update + * the PHY info, like negotiated linkrate. */ - sas_ex_phy_discover(dev, phy_id); - return res; + if (res == 0) + sas_set_ex_phy(dev, phy_id, disc_resp); + goto out_free_resp; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id); @@ -2021,7 +2035,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, action = ", needs recovery"; pr_debug("ex %016llx phy%02d broadcast flutter%s\n", SAS_ADDR(dev->sas_addr), phy_id, action); - return res; + goto out_free_resp; } /* we always have to delete the old device when we went here */ @@ -2030,7 +2044,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(phy->attached_sas_addr)); sas_unregister_devs_sas_addr(dev, phy_id, last); - return sas_discover_new(dev, phy_id); + res = sas_discover_new(dev, phy_id); +out_free_resp: + kfree(disc_resp); + return res; } /** diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 664ac3069c4b..dc5ac3cc70f6 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -33,6 +33,7 @@ struct lpfc_sli2_slim; #define ELX_MODEL_NAME_SIZE 80 +#define ELX_FW_NAME_SIZE 84 #define LPFC_PCI_DEV_LP 0x1 #define LPFC_PCI_DEV_OC 0x2 diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index b54fafb486e0..2373dad01603 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -3169,10 +3169,10 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) } cmdwqe = &cmdiocbq->wqe; - memset(cmdwqe, 0, sizeof(union lpfc_wqe)); + memset(cmdwqe, 0, sizeof(*cmdwqe)); if (phba->sli_rev < LPFC_SLI_REV4) { rspwqe = &rspiocbq->wqe; - memset(rspwqe, 0, sizeof(union lpfc_wqe)); + memset(rspwqe, 0, sizeof(*rspwqe)); } INIT_LIST_HEAD(&head); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f59de61803dc..1a0bafde34d8 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -14778,7 +14778,7 @@ out: int lpfc_sli4_request_firmware_update(struct lpfc_hba *phba, uint8_t fw_upgrade) { - uint8_t file_name[ELX_MODEL_NAME_SIZE]; + char file_name[ELX_FW_NAME_SIZE] = {0}; int ret; const struct firmware *fw; @@ -14787,7 +14787,7 @@ lpfc_sli4_request_firmware_update(struct lpfc_hba *phba, uint8_t fw_upgrade) LPFC_SLI_INTF_IF_TYPE_2) return -EPERM; - snprintf(file_name, ELX_MODEL_NAME_SIZE, "%s.grp", phba->ModelName); + scnprintf(file_name, sizeof(file_name), "%s.grp", phba->ModelName); if (fw_upgrade == INT_FW_UPGRADE) { ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT, diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index f7cfac0da9b6..1c64da3b2e9f 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1586,7 +1586,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) wqe = &nvmewqe->wqe; /* Initialize WQE */ - memset(wqe, 0, sizeof(union lpfc_wqe)); + memset(wqe, 0, sizeof(*wqe)); ctx_buf->iocbq->cmd_dmabuf = NULL; spin_lock(&phba->sli4_hba.sgl_list_lock); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 7aac9fc71967..0bb7e164b525 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1919,7 +1919,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datasegcnt, struct lpfc_io_buf *lpfc_cmd) @@ -1927,8 +1927,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct scatterlist *sgde = NULL; /* s/g data entry */ struct sli4_sge_diseed *diseed = NULL; dma_addr_t physaddr; - int i = 0, num_sge = 0, status; - uint32_t reftag; + int i = 0, status; + uint32_t reftag, num_sge = 0; uint8_t txop, rxop; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t rc; @@ -2100,7 +2100,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datacnt, int protcnt, struct lpfc_io_buf *lpfc_cmd) @@ -2124,8 +2124,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, uint32_t rc; #endif uint32_t checking = 1; - uint32_t dma_offset = 0; - int num_sge = 0, j = 2; + uint32_t dma_offset = 0, num_sge = 0; + int j = 2; struct sli4_hybrid_sgl *sgl_xtra = NULL; sgpe = scsi_prot_sglist(sc); diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index d2c7de804b99..41636c4c43af 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1886,7 +1886,8 @@ static int mpi3mr_create_op_reply_q(struct mpi3mr_ioc *mrioc, u16 qidx) reply_qid = qidx + 1; op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD; - if (!mrioc->pdev->revision) + if ((mrioc->pdev->device == MPI3_MFGPAGE_DEVID_SAS4116) && + !mrioc->pdev->revision) op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD4K; op_reply_q->ci = 0; op_reply_q->ephase = 1; diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 85f5b349c7e4..7a6b006e70c8 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -4963,7 +4963,10 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) mpi3mr_init_drv_cmd(&mrioc->evtack_cmds[i], MPI3MR_HOSTTAG_EVTACKCMD_MIN + i); - if (pdev->revision) + if ((pdev->device == MPI3_MFGPAGE_DEVID_SAS4116) && + !pdev->revision) + mrioc->enable_segqueue = false; + else mrioc->enable_segqueue = true; init_waitqueue_head(&mrioc->reset_waitq); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 809be43f440d..8e6ac08e553b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7398,7 +7398,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout) return -EFAULT; } - issue_diag_reset: + return 0; + +issue_diag_reset: rc = _base_diag_reset(ioc); return rc; } diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 07fbaa452d8a..0d414c1aa84e 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2741,7 +2741,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport) return; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900c, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(fcport->vha); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); return; } } @@ -2763,7 +2769,11 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) vha = fcport->vha; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900b, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(vha); qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET); return; diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 1713588f671f..31c451daeeb8 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -83,7 +83,7 @@ typedef union { #include "qla_nvme.h" #define QLA2XXX_DRIVER_NAME "qla2xxx" #define QLA2XXX_APIDEV "ql2xapidev" -#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc." +#define QLA2XXX_MANUFACTURER "Marvell" /* * We have MAILBOX_REGISTER_COUNT sized arrays in a few places, diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 2e4537f9e5b5..73cd869caf60 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -44,7 +44,7 @@ extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *); extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *); extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t); -extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool); +extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *); extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha, struct els_plogi *els_plogi); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 884ed77259f8..c64e44964d84 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1187,8 +1187,12 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - /* ref: INIT */ - kref_put(&sp->cmd_kref, qla2x00_sp_release); + /* + * use qla24xx_async_gnl_sp_done to purge all pending gnl request. + * kref_put is call behind the scene. + */ + sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR; + qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR); fcport->flags &= ~(FCF_ASYNC_SENT); done: fcport->flags &= ~(FCF_ASYNC_ACTIVE); @@ -2666,6 +2670,40 @@ exit: return rval; } +static void qla_enable_fce_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->fce) { + ha->flags.fce_enabled = 1; + memset(ha->fce, 0, fce_calc_size(ha->fce_bufs)); + rval = qla2x00_enable_fce_trace(vha, + ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8033, + "Unable to reinitialize FCE (%d).\n", rval); + ha->flags.fce_enabled = 0; + } + } +} + +static void qla_enable_eft_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->eft) { + memset(ha->eft, 0, EFT_SIZE); + rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8034, + "Unable to reinitialize EFT (%d).\n", rval); + } + } +} /* * qla2x00_initialize_adapter * Initialize board. @@ -3669,9 +3707,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha) } static void -qla2x00_init_fce_trace(scsi_qla_host_t *vha) +qla2x00_alloc_fce_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3700,27 +3737,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS, - ha->fce_mb, &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x00bf, - "Unable to initialize FCE (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c0, "Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024); - ha->flags.fce_enabled = 1; ha->fce_dma = tc_dma; ha->fce = tc; + ha->fce_bufs = FCE_NUM_BUFFERS; } static void -qla2x00_init_eft_trace(scsi_qla_host_t *vha) +qla2x00_alloc_eft_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3745,14 +3772,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x00c2, - "Unable to initialize EFT (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c3, "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024); @@ -3760,13 +3779,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) ha->eft = tc; } -static void -qla2x00_alloc_offload_mem(scsi_qla_host_t *vha) -{ - qla2x00_init_fce_trace(vha); - qla2x00_init_eft_trace(vha); -} - void qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) { @@ -3821,10 +3833,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) if (ha->tgt.atio_ring) mq_size += ha->tgt.atio_q_length * sizeof(request_t); - qla2x00_init_fce_trace(vha); + qla2x00_alloc_fce_trace(vha); if (ha->fce) fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE; - qla2x00_init_eft_trace(vha); + qla2x00_alloc_eft_trace(vha); if (ha->eft) eft_size = EFT_SIZE; } @@ -4254,7 +4266,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; unsigned long flags; - uint16_t fw_major_version; int done_once = 0; if (IS_P3P_TYPE(ha)) { @@ -4321,7 +4332,6 @@ execute_fw_with_lr: goto failed; enable_82xx_npiv: - fw_major_version = ha->fw_major_version; if (IS_P3P_TYPE(ha)) qla82xx_check_md_needed(vha); else @@ -4350,12 +4360,11 @@ enable_82xx_npiv: if (rval != QLA_SUCCESS) goto failed; - if (!fw_major_version && !(IS_P3P_TYPE(ha))) - qla2x00_alloc_offload_mem(vha); - if (ql2xallocfwdump && !(IS_P3P_TYPE(ha))) qla2x00_alloc_fw_dump(vha); + qla_enable_fce_trace(vha); + qla_enable_eft_trace(vha); } else { goto failed; } @@ -7540,12 +7549,12 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) int qla2x00_abort_isp(scsi_qla_host_t *vha) { - int rval; uint8_t status = 0; struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp, *tvp; struct req_que *req = ha->req_q_map[0]; unsigned long flags; + fc_port_t *fcport; if (vha->flags.online) { qla2x00_abort_isp_cleanup(vha); @@ -7614,6 +7623,15 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) "ISP Abort - ISP reg disconnect post nvmram config, exiting.\n"); return status; } + + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + if (!qla2x00_restart_isp(vha)) { clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags); @@ -7634,31 +7652,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) if (IS_QLA81XX(ha) || IS_QLA8031(ha)) qla2x00_get_fw_version(vha); - if (ha->fce) { - ha->flags.fce_enabled = 1; - memset(ha->fce, 0, - fce_calc_size(ha->fce_bufs)); - rval = qla2x00_enable_fce_trace(vha, - ha->fce_dma, ha->fce_bufs, ha->fce_mb, - &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x8033, - "Unable to reinitialize FCE " - "(%d).\n", rval); - ha->flags.fce_enabled = 0; - } - } - if (ha->eft) { - memset(ha->eft, 0, EFT_SIZE); - rval = qla2x00_enable_eft_trace(vha, - ha->eft_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x8034, - "Unable to reinitialize EFT " - "(%d).\n", rval); - } - } } else { /* failed the ISP abort */ vha->flags.online = 1; if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { @@ -7708,6 +7702,14 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) atomic_inc(&vp->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vp->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + qla2x00_vp_abort_isp(vp); spin_lock_irqsave(&ha->vport_slock, flags); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 9e524d52dc86..7bccd525ee19 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2588,6 +2588,33 @@ void qla2x00_sp_release(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); + struct scsi_qla_host *vha = sp->vha; + + switch (sp->type) { + case SRB_CT_PTHRU_CMD: + /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */ + if (sp->u.iocb_cmd.u.ctarg.req && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + sp->u.iocb_cmd.u.ctarg.req, + sp->u.iocb_cmd.u.ctarg.req_dma); + sp->u.iocb_cmd.u.ctarg.req = NULL; + } + if (sp->u.iocb_cmd.u.ctarg.rsp && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + sp->u.iocb_cmd.u.ctarg.rsp, + sp->u.iocb_cmd.u.ctarg.rsp_dma); + sp->u.iocb_cmd.u.ctarg.rsp = NULL; + } + break; + default: + break; + } sp->free(sp); } @@ -2611,7 +2638,8 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp) { struct srb_iocb *elsio = &sp->u.iocb_cmd; - kfree(sp->fcport); + if (sp->fcport) + qla2x00_free_fcport(sp->fcport); if (elsio->u.els_logo.els_logo_pyld) dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE, @@ -2693,7 +2721,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { - kfree(fcport); + qla2x00_free_fcport(fcport); ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); return -ENOMEM; @@ -2724,6 +2752,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_logo.els_logo_pyld) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2748,6 +2777,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (rval != QLA_SUCCESS) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -3013,7 +3043,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, - fc_port_t *fcport, bool wait) + fc_port_t *fcport) { srb_t *sp; struct srb_iocb *elsio = NULL; @@ -3028,8 +3058,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!sp) { ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); - fcport->flags &= ~FCF_ASYNC_ACTIVE; - return -ENOMEM; + goto done; } fcport->flags |= FCF_ASYNC_SENT; @@ -3038,9 +3067,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_io, vha, 0x3073, "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24); - if (wait) - sp->flags = SRB_WAKEUP_ON_COMP; - sp->type = SRB_ELS_DCMD; sp->name = "ELS_DCMD"; sp->fcport = fcport; @@ -3056,7 +3082,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_plogi_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } resp_ptr = elsio->u.els_plogi.els_resp_pyld = @@ -3065,7 +3091,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_resp_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr); @@ -3080,7 +3106,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) { struct fc_els_flogi *p = ptr; - p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC); } @@ -3089,10 +3114,11 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, sizeof(*elsio->u.els_plogi.els_plogi_pyld)); - init_completion(&elsio->u.els_plogi.comp); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - rval = QLA_FUNCTION_FAILED; + fcport->flags |= FCF_LOGIN_NEEDED; + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + goto done_free_sp; } else { ql_dbg(ql_dbg_disc, vha, 0x3074, "%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n", @@ -3100,21 +3126,15 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b24, vha->d_id.b24); } - if (wait) { - wait_for_completion(&elsio->u.els_plogi.comp); + return rval; - if (elsio->u.els_plogi.comp_status != CS_COMPLETE) - rval = QLA_FUNCTION_FAILED; - } else { - goto done; - } - -out: - fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); +done_free_sp: qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); done: + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); + qla2x00_set_fcport_disc_state(fcport, DSC_DELETED); return rval; } @@ -3916,7 +3936,7 @@ qla2x00_start_sp(srb_t *sp) return -EAGAIN; } - pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); + pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp); if (!pkt) { rval = -EAGAIN; ql_log(ql_log_warn, vha, 0x700c, diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index f794f4363a38..1fd9485985f2 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -194,7 +194,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || ha->flags.eeh_busy) { ql_log(ql_log_warn, vha, 0xd035, - "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", + "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); rval = QLA_ABORTED; goto premature_exit; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 25ca0544b963..25d0c2bfdd74 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5562,7 +5562,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) break; case QLA_EVT_ELS_PLOGI: qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI, - e->u.fcport.fcport, false); + e->u.fcport.fcport); break; case QLA_EVT_SA_REPLACE: rc = qla24xx_issue_sa_replace_iocb(vha, e); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 5a5beb41786e..043cfa10c716 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1079,6 +1079,16 @@ void qlt_free_session_done(struct work_struct *work) "%s: sess %p logout completed\n", __func__, sess); } + /* check for any straggling io left behind */ + if (!(sess->flags & FCF_FCP2_DEVICE) && + qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) { + ql_log(ql_log_warn, vha, 0x3027, + "IO not return. Resetting.\n"); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_chip_reset(vha); + } + if (sess->logo_ack_needed) { sess->logo_ack_needed = 0; qla24xx_async_notify_ack(vha, sess, diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 3cda5d26b66c..e70ab8db3014 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, return result + 4; } +enum scsi_vpd_parameters { + SCSI_VPD_HEADER_SIZE = 4, + SCSI_VPD_LIST_SIZE = 36, +}; + static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) { - unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); + unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4); int result; if (sdev->no_vpd_size) return SCSI_DEFAULT_VPD_LEN; + /* + * Fetch the supported pages VPD and validate that the requested page + * number is present. + */ + if (page != 0) { + result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd)); + if (result < SCSI_VPD_HEADER_SIZE) + return 0; + + result -= SCSI_VPD_HEADER_SIZE; + if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) + return 0; + } /* * Fetch the VPD page header to find out how big the page * is. This is done to prevent problems on legacy devices * which can not handle allocation lengths as large as * potentially requested by the caller. */ - result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header)); + result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE); if (result < 0) return 0; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 9a289d6f2e5e..cfa6f0edff17 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -61,11 +61,11 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd); static enum scsi_disposition scsi_try_to_abort_cmd(struct scsi_host_template *, struct scsi_cmnd *); -void scsi_eh_wakeup(struct Scsi_Host *shost) +void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy) { lockdep_assert_held(shost->host_lock); - if (scsi_host_busy(shost) == shost->host_failed) { + if (busy == shost->host_failed) { trace_scsi_eh_wakeup(shost); wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, @@ -88,7 +88,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost) if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); } spin_unlock_irqrestore(shost->host_lock, flags); @@ -277,11 +277,12 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) { struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); struct Scsi_Host *shost = scmd->device->host; + unsigned int busy = scsi_host_busy(shost); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d25e1c247253..edd296f950a3 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -185,39 +185,37 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } - /** - * __scsi_execute - insert request and wait for the result - * @sdev: scsi device + * scsi_execute_cmd - insert request and wait for the result + * @sdev: scsi_device * @cmd: scsi command - * @data_direction: data direction + * @opf: block layer request cmd_flags * @buffer: data buffer * @bufflen: len of buffer - * @sense: optional sense buffer - * @sshdr: optional decoded sense header * @timeout: request timeout in HZ * @retries: number of times to retry request - * @flags: flags for ->cmd_flags - * @rq_flags: flags for ->rq_flags - * @resid: optional residual length + * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ -int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid) +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args) { + static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; - req = scsi_alloc_request(sdev->request_queue, - data_direction == DMA_TO_DEVICE ? - REQ_OP_DRV_OUT : REQ_OP_DRV_IN, - rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); + if (!args) + args = &default_args; + else if (WARN_ON_ONCE(args->sense && + args->sense_len != SCSI_SENSE_BUFFERSIZE)) + return -EINVAL; + + req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); @@ -232,8 +230,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, memcpy(scmd->cmnd, cmd, scmd->cmd_len); scmd->allowed = retries; req->timeout = timeout; - req->cmd_flags |= flags; - req->rq_flags |= rq_flags | RQF_QUIET; + req->rq_flags |= RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work @@ -249,20 +246,21 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); - if (resid) - *resid = scmd->resid_len; - if (sense && scmd->sense_len) - memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); - if (sshdr) + if (args->resid) + *args->resid = scmd->resid_len; + if (args->sense) + memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); + if (args->sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, - sshdr); + args->sshdr); + ret = scmd->result; out: blk_mq_free_request(req); return ret; } -EXPORT_SYMBOL(__scsi_execute); +EXPORT_SYMBOL(scsi_execute_cmd); /* * Wake up the error handler if necessary. Avoid as follows that the error @@ -280,9 +278,11 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) rcu_read_lock(); __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state); if (unlikely(scsi_host_in_recovery(shost))) { + unsigned int busy = scsi_host_busy(shost); + spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index b14545acb40f..9b5fb30e684b 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -86,7 +86,7 @@ extern void scmd_eh_abort_handler(struct work_struct *work); extern enum blk_eh_timer_return scsi_timeout(struct request *req); extern int scsi_error_handler(void *host); extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd); -extern void scsi_eh_wakeup(struct Scsi_Host *shost); +extern void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy); extern void scsi_eh_scmd_add(struct scsi_cmnd *); void scsi_eh_ready_devs(struct Scsi_Host *shost, struct list_head *work_q, diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index bab00b65bc9d..852d509b19b2 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1611,6 +1611,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); +int scsi_resume_device(struct scsi_device *sdev) +{ + struct device *dev = &sdev->sdev_gendev; + int ret = 0; + + device_lock(dev); + + /* + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { + ret = -EWOULDBLOCK; + goto unlock; + } + + if (dev->driver && try_module_get(dev->driver->owner)) { + struct scsi_driver *drv = to_scsi_driver(dev->driver); + + if (drv->resume) + ret = drv->resume(dev); + module_put(dev->driver->owner); + } + +unlock: + device_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(scsi_resume_device); + int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fa3718bd1c61..bde14b556ac0 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -110,6 +110,7 @@ static int sd_suspend_system(struct device *); static int sd_suspend_runtime(struct device *); static int sd_resume_system(struct device *); static int sd_resume_runtime(struct device *); +static int sd_resume(struct device *); static void sd_rescan(struct device *); static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt); static void sd_uninit_command(struct scsi_cmnd *SCpnt); @@ -691,6 +692,7 @@ static struct scsi_driver sd_template = { .pm = &sd_pm_ops, }, .rescan = sd_rescan, + .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, @@ -3284,6 +3286,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, + SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3323,7 +3343,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will @@ -3807,7 +3833,22 @@ static int sd_suspend_runtime(struct device *dev) return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev, bool runtime) +static int sd_resume(struct device *dev) +{ + struct scsi_disk *sdkp = dev_get_drvdata(dev); + + if (sdkp->device->no_start_on_resume) + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + + if (opal_unlock_from_suspend(sdkp->opal_dev)) { + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); + return -EIO; + } + + return 0; +} + +static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret = 0; @@ -3826,7 +3867,7 @@ static int sd_resume(struct device *dev, bool runtime) } if (!ret) { - opal_unlock_from_suspend(sdkp->opal_dev); + sd_resume(dev); sdkp->suspended = false; } @@ -3845,7 +3886,7 @@ static int sd_resume_system(struct device *dev) return 0; } - return sd_resume(dev, false); + return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -3869,7 +3910,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev, true); + return sd_resume_common(dev, true); } /** diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 47d487729635..e44f6bb25a8e 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6449,8 +6449,11 @@ static void pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + if (!ctrl_info->disable_managed_interrupts) + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], ctrl_info->pci_dev, 0); + else + return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); } static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index c2d981d5a2dd..4fad9d85bd6f 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -326,6 +326,7 @@ enum storvsc_request_type { */ static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -683,8 +684,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -1964,7 +1965,7 @@ static int storvsc_probe(struct hv_device *device, dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -2157,7 +2158,7 @@ static int storvsc_resume(struct hv_device *hv_dev) { int ret; - ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, hv_dev_is_fc(hv_dev)); return ret; } @@ -2191,8 +2192,9 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c index 219483b79c09..37fd655994ef 100644 --- a/drivers/slimbus/core.c +++ b/drivers/slimbus/core.c @@ -436,8 +436,8 @@ static int slim_device_alloc_laddr(struct slim_device *sbdev, if (ret < 0) goto err; } else if (report_present) { - ret = ida_simple_get(&ctrl->laddr_ida, - 0, SLIM_LA_MANAGER - 1, GFP_KERNEL); + ret = ida_alloc_max(&ctrl->laddr_ida, + SLIM_LA_MANAGER - 1, GFP_KERNEL); if (ret < 0) goto err; diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c index 1d2b27e3ea63..b811446e0fa5 100644 --- a/drivers/soc/fsl/dpio/dpio-service.c +++ b/drivers/soc/fsl/dpio/dpio-service.c @@ -523,7 +523,7 @@ int dpaa2_io_service_enqueue_multiple_desc_fq(struct dpaa2_io *d, struct qbman_eq_desc *ed; int i, ret; - ed = kcalloc(sizeof(struct qbman_eq_desc), 32, GFP_KERNEL); + ed = kcalloc(32, sizeof(struct qbman_eq_desc), GFP_KERNEL); if (!ed) return -ENOMEM; diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 739e4eee6b75..7e9074519ad2 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -991,7 +991,7 @@ struct qman_portal { /* linked-list of CSCN handlers. */ struct list_head cgr_cbs; /* list lock */ - spinlock_t cgr_lock; + raw_spinlock_t cgr_lock; struct work_struct congestion_work; struct work_struct mr_work; char irqname[MAX_IRQNAME]; @@ -1281,7 +1281,7 @@ static int qman_create_portal(struct qman_portal *portal, /* if the given mask is NULL, assume all CGRs can be seen */ qman_cgrs_fill(&portal->cgrs[0]); INIT_LIST_HEAD(&portal->cgr_cbs); - spin_lock_init(&portal->cgr_lock); + raw_spin_lock_init(&portal->cgr_lock); INIT_WORK(&portal->congestion_work, qm_congestion_task); INIT_WORK(&portal->mr_work, qm_mr_process_task); portal->bits = 0; @@ -1456,11 +1456,14 @@ static void qm_congestion_task(struct work_struct *work) union qm_mc_result *mcr; struct qman_cgr *cgr; - spin_lock(&p->cgr_lock); + /* + * FIXME: QM_MCR_TIMEOUT is 10ms, which is too long for a raw spinlock! + */ + raw_spin_lock_irq(&p->cgr_lock); qm_mc_start(&p->p); qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); if (!qm_mc_result_timeout(&p->p, &mcr)) { - spin_unlock(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); qman_p_irqsource_add(p, QM_PIRQ_CSCI); return; @@ -1476,7 +1479,7 @@ static void qm_congestion_task(struct work_struct *work) list_for_each_entry(cgr, &p->cgr_cbs, node) if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); - spin_unlock(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); qman_p_irqsource_add(p, QM_PIRQ_CSCI); } @@ -2440,7 +2443,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, preempt_enable(); cgr->chan = p->config->channel; - spin_lock(&p->cgr_lock); + raw_spin_lock_irq(&p->cgr_lock); if (opts) { struct qm_mcc_initcgr local_opts = *opts; @@ -2477,7 +2480,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, qman_cgrs_get(&p->cgrs[1], cgr->cgrid)) cgr->cb(p, cgr, 1); out: - spin_unlock(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); put_affine_portal(); return ret; } @@ -2512,7 +2515,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) return -EINVAL; memset(&local_opts, 0, sizeof(struct qm_mcc_initcgr)); - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); list_del(&cgr->node); /* * If there are no other CGR objects for this CGRID in the list, @@ -2537,7 +2540,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) /* add back to the list */ list_add(&cgr->node, &p->cgr_cbs); release_lock: - spin_unlock_irqrestore(&p->cgr_lock, irqflags); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; } @@ -2577,9 +2580,9 @@ static int qman_update_cgr(struct qman_cgr *cgr, struct qm_mcc_initcgr *opts) if (!p) return -EINVAL; - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); ret = qm_modify_cgr(cgr, 0, opts); - spin_unlock_irqrestore(&p->cgr_lock, irqflags); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; } diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c index 474b272f9b02..832adb570b50 100644 --- a/drivers/soc/mediatek/mtk-pm-domains.c +++ b/drivers/soc/mediatek/mtk-pm-domains.c @@ -499,6 +499,11 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren goto err_put_node; } + /* recursive call to add all subdomains */ + ret = scpsys_add_subdomain(scpsys, child); + if (ret) + goto err_put_node; + ret = pm_genpd_add_subdomain(parent_pd, child_pd); if (ret) { dev_err(scpsys->dev, "failed to add %s subdomain to parent %s\n", @@ -508,11 +513,6 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren dev_dbg(scpsys->dev, "%s add subdomain: %s\n", parent_pd->name, child_pd->name); } - - /* recursive call to add all subdomains */ - ret = scpsys_add_subdomain(scpsys, child); - if (ret) - goto err_put_node; } return 0; @@ -526,9 +526,6 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) { int ret; - if (scpsys_domain_is_on(pd)) - scpsys_power_off(&pd->genpd); - /* * We're in the error cleanup already, so we only complain, * but won't emit another error on top of the original one. @@ -538,6 +535,8 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) dev_err(pd->scpsys->dev, "failed to remove domain '%s' : %d - state may be inconsistent\n", pd->genpd.name, ret); + if (scpsys_domain_is_on(pd)) + scpsys_power_off(&pd->genpd); clk_bulk_put(pd->num_clks, pd->clks); clk_bulk_put(pd->num_subsys_clks, pd->subsys_clks); diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig index eb656b33156b..f19e74d342aa 100644 --- a/drivers/soc/microchip/Kconfig +++ b/drivers/soc/microchip/Kconfig @@ -1,5 +1,5 @@ config POLARFIRE_SOC_SYS_CTRL - tristate "POLARFIRE_SOC_SYS_CTRL" + tristate "Microchip PolarFire SoC (MPFS) system controller support" depends on POLARFIRE_SOC_MAILBOX help This driver adds support for the PolarFire SoC (MPFS) system controller. diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index 092f6ab09acf..6efe36aeb48e 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -195,7 +195,6 @@ static struct rpmhpd *sa8540p_rpmhpds[] = { [SC8280XP_CX] = &cx, [SC8280XP_CX_AO] = &cx_ao, [SC8280XP_EBI] = &ebi, - [SC8280XP_GFX] = &gfx, [SC8280XP_LCX] = &lcx, [SC8280XP_LMX] = &lmx, [SC8280XP_MMCX] = &mmcx, @@ -492,12 +491,15 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) unsigned int active_corner, sleep_corner; unsigned int this_active_corner = 0, this_sleep_corner = 0; unsigned int peer_active_corner = 0, peer_sleep_corner = 0; + unsigned int peer_enabled_corner; to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner); - if (peer && peer->enabled) - to_active_sleep(peer, peer->corner, &peer_active_corner, + if (peer && peer->enabled) { + peer_enabled_corner = max(peer->corner, peer->enable_corner); + to_active_sleep(peer, peer_enabled_corner, &peer_active_corner, &peer_sleep_corner); + } active_corner = max(this_active_corner, peer_active_corner); diff --git a/drivers/soc/renesas/r8a77980-sysc.c b/drivers/soc/renesas/r8a77980-sysc.c index 39ca84a67daa..621e411fc999 100644 --- a/drivers/soc/renesas/r8a77980-sysc.c +++ b/drivers/soc/renesas/r8a77980-sysc.c @@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77980_areas[] __initconst = { PD_CPU_NOCR }, { "ca53-cpu3", 0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU, PD_CPU_NOCR }, - { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON }, + { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON, + PD_CPU_NOCR }, { "a3ir", 0x180, 0, R8A77980_PD_A3IR, R8A77980_PD_ALWAYS_ON }, { "a2ir0", 0x400, 0, R8A77980_PD_A2IR0, R8A77980_PD_A3IR }, { "a2ir1", 0x400, 1, R8A77980_PD_A2IR1, R8A77980_PD_A3IR }, diff --git a/drivers/soc/xilinx/xlnx_event_manager.c b/drivers/soc/xilinx/xlnx_event_manager.c index f9d9b82b562d..8293cc40047f 100644 --- a/drivers/soc/xilinx/xlnx_event_manager.c +++ b/drivers/soc/xilinx/xlnx_event_manager.c @@ -477,7 +477,7 @@ static void xlnx_call_notify_cb_handler(const u32 *payload) } } if (!is_callback_found) - pr_warn("Didn't find any registered callback for 0x%x 0x%x\n", + pr_warn("Unhandled SGI node 0x%x event 0x%x. Expected with Xen hypervisor\n", payload[1], payload[2]); } @@ -555,7 +555,7 @@ static void xlnx_disable_percpu_irq(void *data) static int xlnx_event_init_sgi(struct platform_device *pdev) { int ret = 0; - int cpu = smp_processor_id(); + int cpu; /* * IRQ related structures are used for the following: * for each SGI interrupt ensure its mapped by GIC IRQ domain @@ -592,9 +592,12 @@ static int xlnx_event_init_sgi(struct platform_device *pdev) sgi_fwspec.param[0] = sgi_num; virq_sgi = irq_create_fwspec_mapping(&sgi_fwspec); + cpu = get_cpu(); per_cpu(cpu_number1, cpu) = cpu; ret = request_percpu_irq(virq_sgi, xlnx_event_handler, "xlnx_event_mgmt", &cpu_number1); + put_cpu(); + WARN_ON(ret); if (ret) { irq_dispose_mapping(virq_sgi); diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 137e7315a3cf..b24955910147 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include "spi-bcm-qspi.h" @@ -1221,7 +1221,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, /* non-aligned and very short transfers are handled by MSPI */ if (!IS_ALIGNED((uintptr_t)addr, 4) || !IS_ALIGNED((uintptr_t)buf, 4) || - len < 4) + len < 4 || op->cmd.opcode == SPINOR_OP_RDSFDP) mspi_read = true; if (!has_bspi(qspi) || mspi_read) diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c index d3a23b1c2a4c..61bf00dfe9c3 100644 --- a/drivers/spi/spi-hisi-sfc-v3xx.c +++ b/drivers/spi/spi-hisi-sfc-v3xx.c @@ -377,6 +377,11 @@ static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = { static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data) { struct hisi_sfc_v3xx_host *host = data; + u32 reg; + + reg = readl(host->regbase + HISI_SFC_V3XX_INT_STAT); + if (!reg) + return IRQ_NONE; hisi_sfc_v3xx_disable_int(host); diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 6e95efb50acb..f9ec8742917a 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -787,17 +787,19 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, len); mtk_spi_setup_packet(master); - cnt = mdata->xfer_len / 4; - iowrite32_rep(mdata->base + SPI_TX_DATA_REG, - trans->tx_buf + mdata->num_xfered, cnt); + if (trans->tx_buf) { + cnt = mdata->xfer_len / 4; + iowrite32_rep(mdata->base + SPI_TX_DATA_REG, + trans->tx_buf + mdata->num_xfered, cnt); - remainder = mdata->xfer_len % 4; - if (remainder > 0) { - reg_val = 0; - memcpy(®_val, - trans->tx_buf + (cnt * 4) + mdata->num_xfered, - remainder); - writel(reg_val, mdata->base + SPI_TX_DATA_REG); + remainder = mdata->xfer_len % 4; + if (remainder > 0) { + reg_val = 0; + memcpy(®_val, + trans->tx_buf + (cnt * 4) + mdata->num_xfered, + remainder); + writel(reg_val, mdata->base + SPI_TX_DATA_REG); + } } mtk_spi_enable_transfer(master); diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index d65f047b6c82..1179a1115137 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -166,10 +166,8 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) int scr; u8 cdm = 0; u32 speed; - u8 bits_per_word; /* Start with the generic configuration for this device. */ - bits_per_word = spi->bits_per_word; speed = spi->max_speed_hz; /* @@ -177,9 +175,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) * the transfer to overwrite the generic configuration with zeros. */ if (t) { - if (t->bits_per_word) - bits_per_word = t->bits_per_word; - if (t->speed_hz) speed = min(t->speed_hz, spi->max_speed_hz); } diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 51ceaa485724..ec3a4939ee98 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -137,14 +137,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 22d227878bc4..19688f333e0b 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1636,6 +1636,10 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr, pm_runtime_put_noidle(ctlr->dev.parent); dev_err(&ctlr->dev, "Failed to power device: %d\n", ret); + + msg->status = ret; + spi_finalize_current_message(ctlr); + return ret; } } diff --git a/drivers/spmi/spmi-mtk-pmif.c b/drivers/spmi/spmi-mtk-pmif.c index 01e8851e639d..bf8c0d4109b1 100644 --- a/drivers/spmi/spmi-mtk-pmif.c +++ b/drivers/spmi/spmi-mtk-pmif.c @@ -475,7 +475,7 @@ static int mtk_spmi_probe(struct platform_device *pdev) for (i = 0; i < arb->nclks; i++) arb->clks[i].id = pmif_clock_names[i]; - err = devm_clk_bulk_get(&pdev->dev, arb->nclks, arb->clks); + err = clk_bulk_get(&pdev->dev, arb->nclks, arb->clks); if (err) { dev_err(&pdev->dev, "Failed to get clocks: %d\n", err); goto err_put_ctrl; @@ -484,7 +484,7 @@ static int mtk_spmi_probe(struct platform_device *pdev) err = clk_bulk_prepare_enable(arb->nclks, arb->clks); if (err) { dev_err(&pdev->dev, "Failed to enable clocks: %d\n", err); - goto err_put_ctrl; + goto err_put_clks; } ctrl->cmd = pmif_arb_cmd; @@ -510,6 +510,8 @@ static int mtk_spmi_probe(struct platform_device *pdev) err_domain_remove: clk_bulk_disable_unprepare(arb->nclks, arb->clks); +err_put_clks: + clk_bulk_put(arb->nclks, arb->clks); err_put_ctrl: spmi_controller_put(ctrl); return err; @@ -521,6 +523,7 @@ static int mtk_spmi_remove(struct platform_device *pdev) struct pmif *arb = spmi_controller_get_drvdata(ctrl); clk_bulk_disable_unprepare(arb->nclks, arb->clks); + clk_bulk_put(arb->nclks, arb->clks); spmi_controller_remove(ctrl); spmi_controller_put(ctrl); return 0; diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c index 87d36948c610..c6bd86a5335a 100644 --- a/drivers/staging/greybus/light.c +++ b/drivers/staging/greybus/light.c @@ -100,15 +100,15 @@ static struct led_classdev *get_channel_cdev(struct gb_channel *channel) static struct gb_channel *get_channel_from_mode(struct gb_light *light, u32 mode) { - struct gb_channel *channel = NULL; + struct gb_channel *channel; int i; for (i = 0; i < light->channels_count; i++) { channel = &light->channels[i]; - if (channel && channel->mode == mode) - break; + if (channel->mode == mode) + return channel; } - return channel; + return NULL; } static int __gb_lights_flash_intensity_set(struct gb_channel *channel, diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index f177b20f0f2d..ceba63213894 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work) struct ad5933_state, work.work); struct iio_dev *indio_dev = i2c_get_clientdata(st->client); __be16 buf[2]; - int val[2]; + u16 val[2]; unsigned char status; int ret; diff --git a/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c b/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c index 0c61a2dec221..81fc4835679f 100644 --- a/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c +++ b/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c @@ -1462,7 +1462,8 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio budget_av->has_saa7113 = 1; err = saa7146_vv_init(dev, &vv_data); if (err != 0) { - /* fixme: proper cleanup here */ + ttpci_budget_deinit(&budget_av->budget); + kfree(budget_av); ERR("cannot init vv subsystem\n"); return err; } @@ -1471,9 +1472,10 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio vv_data.vid_ops.vidioc_s_input = vidioc_s_input; if ((err = saa7146_register_device(&budget_av->vd, dev, "knc1", VFL_TYPE_VIDEO))) { - /* fixme: proper cleanup here */ - ERR("cannot register capture v4l2 device\n"); saa7146_vv_release(dev); + ttpci_budget_deinit(&budget_av->budget); + kfree(budget_av); + ERR("cannot register capture v4l2 device\n"); return err; } diff --git a/drivers/staging/media/imx/imx-media-csc-scaler.c b/drivers/staging/media/imx/imx-media-csc-scaler.c index 1fd39a2fca98..95cca281e8a3 100644 --- a/drivers/staging/media/imx/imx-media-csc-scaler.c +++ b/drivers/staging/media/imx/imx-media-csc-scaler.c @@ -803,6 +803,7 @@ static int ipu_csc_scaler_release(struct file *file) dev_dbg(priv->dev, "Releasing instance %p\n", ctx); + v4l2_ctrl_handler_free(&ctx->ctrl_hdlr); v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c index e530767e80a5..55cc44a401bc 100644 --- a/drivers/staging/media/ipu3/ipu3-v4l2.c +++ b/drivers/staging/media/ipu3/ipu3-v4l2.c @@ -1069,6 +1069,11 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu, struct imgu_media_pipe *imgu_pipe = &imgu->imgu_pipe[pipe]; /* Initialize subdev media entity */ + imgu_sd->subdev.entity.ops = &imgu_media_ops; + for (i = 0; i < IMGU_NODE_NUM; i++) { + imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ? + MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE; + } r = media_entity_pads_init(&imgu_sd->subdev.entity, IMGU_NODE_NUM, imgu_sd->subdev_pads); if (r) { @@ -1076,11 +1081,6 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu, "failed initialize subdev media entity (%d)\n", r); return r; } - imgu_sd->subdev.entity.ops = &imgu_media_ops; - for (i = 0; i < IMGU_NODE_NUM; i++) { - imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ? - MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE; - } /* Initialize subdev */ v4l2_subdev_init(&imgu_sd->subdev, &imgu_subdev_ops); @@ -1177,15 +1177,15 @@ static int imgu_v4l2_node_setup(struct imgu_device *imgu, unsigned int pipe, } /* Initialize media entities */ + node->vdev_pad.flags = node->output ? + MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK; + vdev->entity.ops = NULL; r = media_entity_pads_init(&vdev->entity, 1, &node->vdev_pad); if (r) { dev_err(dev, "failed initialize media entity (%d)\n", r); mutex_destroy(&node->lock); return r; } - node->vdev_pad.flags = node->output ? - MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK; - vdev->entity.ops = NULL; /* Initialize vbq */ vbq->type = node->vdev_fmt.type; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h index 93a2196006f7..cb99610f3e12 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus.h +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h @@ -109,6 +109,11 @@ struct cedrus_buffer { unsigned int position; enum cedrus_h264_pic_type pic_type; } h264; + struct { + void *mv_col_buf; + dma_addr_t mv_col_buf_dma; + ssize_t mv_col_buf_size; + } h265; } codec; }; @@ -142,10 +147,6 @@ struct cedrus_ctx { ssize_t intra_pred_buf_size; } h264; struct { - void *mv_col_buf; - dma_addr_t mv_col_buf_addr; - ssize_t mv_col_buf_size; - ssize_t mv_col_buf_unit_size; void *neighbor_info_buf; dma_addr_t neighbor_info_buf_addr; void *entry_points_buf; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index 625f77a8c5bd..9f13c942a806 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -90,12 +90,13 @@ static void cedrus_h265_sram_write_data(struct cedrus_dev *dev, void *data, } static inline dma_addr_t -cedrus_h265_frame_info_mv_col_buf_addr(struct cedrus_ctx *ctx, - unsigned int index, unsigned int field) +cedrus_h265_frame_info_mv_col_buf_addr(struct vb2_buffer *buf, + unsigned int field) { - return ctx->codec.h265.mv_col_buf_addr + index * - ctx->codec.h265.mv_col_buf_unit_size + - field * ctx->codec.h265.mv_col_buf_unit_size / 2; + struct cedrus_buffer *cedrus_buf = vb2_to_cedrus_buffer(buf); + + return cedrus_buf->codec.h265.mv_col_buf_dma + + field * cedrus_buf->codec.h265.mv_col_buf_size / 2; } static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, @@ -108,9 +109,8 @@ static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, dma_addr_t dst_luma_addr = cedrus_dst_buf_addr(ctx, buf, 0); dma_addr_t dst_chroma_addr = cedrus_dst_buf_addr(ctx, buf, 1); dma_addr_t mv_col_buf_addr[2] = { - cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, 0), - cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, - field_pic ? 1 : 0) + cedrus_h265_frame_info_mv_col_buf_addr(buf, 0), + cedrus_h265_frame_info_mv_col_buf_addr(buf, field_pic ? 1 : 0) }; u32 offset = VE_DEC_H265_SRAM_OFFSET_FRAME_INFO + VE_DEC_H265_SRAM_OFFSET_FRAME_INFO_UNIT * index; @@ -412,12 +412,13 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) unsigned int width_in_ctb_luma, ctb_size_luma; unsigned int log2_max_luma_coding_block_size; unsigned int ctb_addr_x, ctb_addr_y; + struct cedrus_buffer *cedrus_buf; dma_addr_t src_buf_addr; - dma_addr_t src_buf_end_addr; u32 chroma_log2_weight_denom; u32 num_entry_point_offsets; u32 output_pic_list_index; u32 pic_order_cnt[2]; + size_t slice_bytes; u8 padding; int count; u32 reg; @@ -428,6 +429,8 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) decode_params = run->h265.decode_params; pred_weight_table = &slice_params->pred_weight_table; num_entry_point_offsets = slice_params->num_entry_point_offsets; + cedrus_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); + slice_bytes = vb2_get_plane_payload(&run->src->vb2_buf, 0); /* * If entry points offsets are present, we should get them @@ -445,31 +448,25 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) DIV_ROUND_UP(sps->pic_width_in_luma_samples, ctb_size_luma); /* MV column buffer size and allocation. */ - if (!ctx->codec.h265.mv_col_buf_size) { - unsigned int num_buffers = - run->dst->vb2_buf.vb2_queue->num_buffers; - + if (!cedrus_buf->codec.h265.mv_col_buf_size) { /* * Each CTB requires a MV col buffer with a specific unit size. * Since the address is given with missing lsb bits, 1 KiB is * added to each buffer to ensure proper alignment. */ - ctx->codec.h265.mv_col_buf_unit_size = + cedrus_buf->codec.h265.mv_col_buf_size = DIV_ROUND_UP(ctx->src_fmt.width, ctb_size_luma) * DIV_ROUND_UP(ctx->src_fmt.height, ctb_size_luma) * CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE + SZ_1K; - ctx->codec.h265.mv_col_buf_size = num_buffers * - ctx->codec.h265.mv_col_buf_unit_size; - /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ - ctx->codec.h265.mv_col_buf = + cedrus_buf->codec.h265.mv_col_buf = dma_alloc_attrs(dev->dev, - ctx->codec.h265.mv_col_buf_size, - &ctx->codec.h265.mv_col_buf_addr, + cedrus_buf->codec.h265.mv_col_buf_size, + &cedrus_buf->codec.h265.mv_col_buf_dma, GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); - if (!ctx->codec.h265.mv_col_buf) { - ctx->codec.h265.mv_col_buf_size = 0; + if (!cedrus_buf->codec.h265.mv_col_buf) { + cedrus_buf->codec.h265.mv_col_buf_size = 0; return -ENOMEM; } } @@ -481,7 +478,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) cedrus_write(dev, VE_DEC_H265_BITS_OFFSET, 0); - reg = slice_params->bit_size; + reg = slice_bytes * 8; cedrus_write(dev, VE_DEC_H265_BITS_LEN, reg); /* Source beginning and end addresses. */ @@ -495,10 +492,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) cedrus_write(dev, VE_DEC_H265_BITS_ADDR, reg); - src_buf_end_addr = src_buf_addr + - DIV_ROUND_UP(slice_params->bit_size, 8); - - reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_end_addr); + reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_addr + slice_bytes); cedrus_write(dev, VE_DEC_H265_BITS_END_ADDR, reg); /* Coding tree block address */ @@ -816,9 +810,6 @@ static int cedrus_h265_start(struct cedrus_ctx *ctx) { struct cedrus_dev *dev = ctx->dev; - /* The buffer size is calculated at setup time. */ - ctx->codec.h265.mv_col_buf_size = 0; - /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ ctx->codec.h265.neighbor_info_buf = dma_alloc_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, @@ -845,14 +836,24 @@ static int cedrus_h265_start(struct cedrus_ctx *ctx) static void cedrus_h265_stop(struct cedrus_ctx *ctx) { struct cedrus_dev *dev = ctx->dev; + struct cedrus_buffer *buf; + struct vb2_queue *vq; + unsigned int i; - if (ctx->codec.h265.mv_col_buf_size > 0) { - dma_free_attrs(dev->dev, ctx->codec.h265.mv_col_buf_size, - ctx->codec.h265.mv_col_buf, - ctx->codec.h265.mv_col_buf_addr, - DMA_ATTR_NO_KERNEL_MAPPING); + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); - ctx->codec.h265.mv_col_buf_size = 0; + for (i = 0; i < vq->num_buffers; i++) { + buf = vb2_to_cedrus_buffer(vb2_get_buffer(vq, i)); + + if (buf->codec.h265.mv_col_buf_size > 0) { + dma_free_attrs(dev->dev, + buf->codec.h265.mv_col_buf_size, + buf->codec.h265.mv_col_buf, + buf->codec.h265.mv_col_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + + buf->codec.h265.mv_col_buf_size = 0; + } } dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index cb921c94996a..90eb4c5936f3 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -938,8 +938,9 @@ static int create_component(struct vchiq_mmal_instance *instance, /* build component create message */ m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE; m.u.component_create.client_component = component->client_component; - strncpy(m.u.component_create.name, name, - sizeof(m.u.component_create.name)); + strscpy_pad(m.u.component_create.name, name, + sizeof(m.u.component_create.name)); + m.u.component_create.pid = 0; ret = send_synchronous_mmal_msg(instance, &m, sizeof(m.u.component_create), diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 301fe376a120..13558cbd9b82 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -147,7 +147,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - unsigned long flags; rcu_read_lock(); deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun); @@ -178,10 +177,6 @@ out_unlock: se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); - spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); - list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); - spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags); - return 0; } EXPORT_SYMBOL(transport_lookup_tmr_lun); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 69a4c9581e80..7aec34c09066 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -910,12 +910,15 @@ new_bio: return 0; fail: - if (bio) - bio_put(bio); + if (bio) { + bio_uninit(bio); + kfree(bio); + } while (req->bio) { bio = req->bio; req->bio = bio->bi_next; - bio_put(bio); + bio_uninit(bio); + kfree(bio); } req->biotail = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0686882bcbda..fb93d74c5d0b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3627,6 +3627,10 @@ int transport_generic_handle_tmr( unsigned long flags; bool aborted = false; + spin_lock_irqsave(&cmd->se_dev->se_tmr_lock, flags); + list_add_tail(&cmd->se_tmr_req->tmr_list, &cmd->se_dev->dev_tmr_list); + spin_unlock_irqrestore(&cmd->se_dev->se_tmr_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->transport_state & CMD_T_ABORTED) { aborted = true; diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 4b1092127694..1892e49a8e6a 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -90,13 +90,14 @@ static int optee_register_device(const uuid_t *device_uuid, u32 func) if (rc) { pr_err("device registration failed, err: %d\n", rc); put_device(&optee_device->dev); + return rc; } if (func == PTA_CMD_GET_DEVICES_SUPP) device_create_file(&optee_device->dev, &dev_attr_need_supplicant); - return rc; + return 0; } static int __optee_enumerate_devices(u32 func) diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index a0640f762dc5..750dab3f259e 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,9 @@ #include #include #include +#include #include +#include #include #include @@ -338,6 +341,52 @@ static void init_hfi_instance(struct hfi_instance *hfi_instance) hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; } +/* Caller must hold hfi_instance_lock. */ +static void hfi_enable(void) +{ + u64 msr_val; + + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); +} + +static void hfi_set_hw_table(struct hfi_instance *hfi_instance) +{ + phys_addr_t hw_table_pa; + u64 msr_val; + + hw_table_pa = virt_to_phys(hfi_instance->hw_table); + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); +} + +/* Caller must hold hfi_instance_lock. */ +static void hfi_disable(void) +{ + u64 msr_val; + int i; + + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + + /* + * Wait for hardware to acknowledge the disabling of HFI. Some + * processors may not do it. Wait for ~2ms. This is a reasonable + * time for hardware to complete any pending actions on the HFI + * memory. + */ + for (i = 0; i < 2000; i++) { + rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); + if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED) + break; + + udelay(1); + cpu_relax(); + } +} + /** * intel_hfi_online() - Enable HFI on @cpu * @cpu: CPU in which the HFI will be enabled @@ -355,8 +404,6 @@ void intel_hfi_online(unsigned int cpu) { struct hfi_instance *hfi_instance; struct hfi_cpu_info *info; - phys_addr_t hw_table_pa; - u64 msr_val; u16 die_id; /* Nothing to do if hfi_instances are missing. */ @@ -394,14 +441,16 @@ void intel_hfi_online(unsigned int cpu) /* * Hardware is programmed with the physical address of the first page * frame of the table. Hence, the allocated memory must be page-aligned. + * + * Some processors do not forget the initial address of the HFI table + * even after having been reprogrammed. Keep using the same pages. Do + * not free them. */ hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, GFP_KERNEL | __GFP_ZERO); if (!hfi_instance->hw_table) goto unlock; - hw_table_pa = virt_to_phys(hfi_instance->hw_table); - /* * Allocate memory to keep a local copy of the table that * hardware generates. @@ -411,16 +460,6 @@ void intel_hfi_online(unsigned int cpu) if (!hfi_instance->local_table) goto free_hw_table; - /* - * Program the address of the feedback table of this die/package. On - * some processors, hardware remembers the old address of the HFI table - * even after having been reprogrammed and re-enabled. Thus, do not free - * the pages allocated for the table or reprogram the hardware with a - * new base address. Namely, program the hardware only once. - */ - msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; - wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); - init_hfi_instance(hfi_instance); INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); @@ -429,13 +468,8 @@ void intel_hfi_online(unsigned int cpu) cpumask_set_cpu(cpu, hfi_instance->cpus); - /* - * Enable the hardware feedback interface and never disable it. See - * comment on programming the address of the table. - */ - rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); - msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; - wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + hfi_set_hw_table(hfi_instance); + hfi_enable(); unlock: mutex_unlock(&hfi_instance_lock); @@ -475,6 +509,10 @@ void intel_hfi_offline(unsigned int cpu) mutex_lock(&hfi_instance_lock); cpumask_clear_cpu(cpu, hfi_instance->cpus); + + if (!cpumask_weight(hfi_instance->cpus)) + hfi_disable(); + mutex_unlock(&hfi_instance_lock); } @@ -523,6 +561,30 @@ static __init int hfi_parse_features(void) return 0; } +static void hfi_do_enable(void) +{ + /* This code runs only on the boot CPU. */ + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0); + struct hfi_instance *hfi_instance = info->hfi_instance; + + /* No locking needed. There is no concurrency with CPU online. */ + hfi_set_hw_table(hfi_instance); + hfi_enable(); +} + +static int hfi_do_disable(void) +{ + /* No locking needed. There is no concurrency with CPU offline. */ + hfi_disable(); + + return 0; +} + +static struct syscore_ops hfi_pm_ops = { + .resume = hfi_do_enable, + .suspend = hfi_do_disable, +}; + void __init intel_hfi_init(void) { struct hfi_instance *hfi_instance; @@ -554,6 +616,8 @@ void __init intel_hfi_init(void) if (!hfi_updates_wq) goto err_nomem; + register_syscore_ops(&hfi_pm_ops); + return; err_nomem: diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index dca1abe36324..55451ff84652 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -714,6 +714,7 @@ static void exar_pci_remove(struct pci_dev *pcidev) for (i = 0; i < priv->nr; i++) serial8250_unregister_port(priv->line[i]); + /* Ensure that every init quirk is properly torn down */ if (priv->board->exit) priv->board->exit(pcidev); } @@ -728,10 +729,6 @@ static int __maybe_unused exar_suspend(struct device *dev) if (priv->line[i] >= 0) serial8250_suspend_port(priv->line[i]); - /* Ensure that every init quirk is properly torn down */ - if (priv->board->exit) - priv->board->exit(pcidev); - return 0; } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index c79a29bf4c76..47d601e0e657 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1389,9 +1389,6 @@ static void autoconfig_irq(struct uart_8250_port *up) inb_p(ICP); } - if (uart_console(port)) - console_lock(); - /* forget possible initially masked and pending IRQ */ probe_irq_off(probe_irq_on()); save_mcr = serial8250_in_MCR(up); @@ -1422,9 +1419,6 @@ static void autoconfig_irq(struct uart_8250_port *up) if (port->flags & UPF_FOURPORT) outb_p(save_ICP, ICP); - if (uart_console(port)) - console_unlock(); - port->irq = (irq > 0) ? irq : 0; } diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index c74eaf2552c3..2f0f05259778 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1345,11 +1345,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap) } } +static void pl011_rs485_tx_start(struct uart_amba_port *uap) +{ + struct uart_port *port = &uap->port; + u32 cr; + + /* Enable transmitter */ + cr = pl011_read(uap, REG_CR); + cr |= UART011_CR_TXE; + + /* Disable receiver if half-duplex */ + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + cr &= ~UART011_CR_RXE; + + if (port->rs485.flags & SER_RS485_RTS_ON_SEND) + cr &= ~UART011_CR_RTS; + else + cr |= UART011_CR_RTS; + + pl011_write(cr, uap, REG_CR); + + if (port->rs485.delay_rts_before_send) + mdelay(port->rs485.delay_rts_before_send); + + uap->rs485_tx_started = true; +} + static void pl011_start_tx(struct uart_port *port) { struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + if ((uap->port.rs485.flags & SER_RS485_ENABLED) && + !uap->rs485_tx_started) + pl011_rs485_tx_start(uap); + if (!pl011_dma_tx_start(uap)) pl011_start_tx_pio(uap); } @@ -1431,42 +1461,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, return true; } -static void pl011_rs485_tx_start(struct uart_amba_port *uap) -{ - struct uart_port *port = &uap->port; - u32 cr; - - /* Enable transmitter */ - cr = pl011_read(uap, REG_CR); - cr |= UART011_CR_TXE; - - /* Disable receiver if half-duplex */ - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - cr &= ~UART011_CR_RXE; - - if (port->rs485.flags & SER_RS485_RTS_ON_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - - pl011_write(cr, uap, REG_CR); - - if (port->rs485.delay_rts_before_send) - mdelay(port->rs485.delay_rts_before_send); - - uap->rs485_tx_started = true; -} - /* Returns true if tx interrupts have to be (kept) enabled */ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) { struct circ_buf *xmit = &uap->port.state->xmit; int count = uap->fifosize >> 1; - if ((uap->port.rs485.flags & SER_RS485_ENABLED) && - !uap->rs485_tx_started) - pl011_rs485_tx_start(uap); - if (uap->port.x_char) { if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) return true; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index c5a9b89c4d31..f94c78263868 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2213,9 +2213,12 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, lpuart32_write(&sport->port, bd, UARTBAUD); lpuart32_serial_setbrg(sport, baud); - lpuart32_write(&sport->port, modem, UARTMODIR); - lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* disable CTS before enabling UARTCTRL_TE to avoid pending idle preamble */ + lpuart32_write(&sport->port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); /* restore control register */ + lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* re-enable the CTS if needed */ + lpuart32_write(&sport->port, modem, UARTMODIR); if ((ctrl & (UARTCTRL_PE | UARTCTRL_M)) == UARTCTRL_PE) sport->is_cs7 = true; diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index f8962a3d4421..573bf7e9b797 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -496,8 +496,7 @@ static void imx_uart_stop_tx(struct uart_port *port) } } -/* called with port.lock taken and irqs off */ -static void imx_uart_stop_rx(struct uart_port *port) +static void imx_uart_stop_rx_with_loopback_ctrl(struct uart_port *port, bool loopback) { struct imx_port *sport = (struct imx_port *)port; u32 ucr1, ucr2, ucr4, uts; @@ -519,7 +518,7 @@ static void imx_uart_stop_rx(struct uart_port *port) /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ if (port->rs485.flags & SER_RS485_ENABLED && port->rs485.flags & SER_RS485_RTS_ON_SEND && - sport->have_rtscts && !sport->have_rtsgpio) { + sport->have_rtscts && !sport->have_rtsgpio && loopback) { uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); uts |= UTS_LOOP; imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); @@ -531,6 +530,16 @@ static void imx_uart_stop_rx(struct uart_port *port) imx_uart_writel(sport, ucr2, UCR2); } +/* called with port.lock taken and irqs off */ +static void imx_uart_stop_rx(struct uart_port *port) +{ + /* + * Stop RX and enable loopback in order to make sure RS485 bus + * is not blocked. Se comment in imx_uart_probe(). + */ + imx_uart_stop_rx_with_loopback_ctrl(port, true); +} + /* called with port.lock taken and irqs off */ static void imx_uart_enable_ms(struct uart_port *port) { @@ -719,8 +728,13 @@ static void imx_uart_start_tx(struct uart_port *port) imx_uart_rts_inactive(sport, &ucr2); imx_uart_writel(sport, ucr2, UCR2); + /* + * Since we are about to transmit we can not stop RX + * with loopback enabled because that will make our + * transmitted data being just looped to RX. + */ if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - imx_uart_stop_rx(port); + imx_uart_stop_rx_with_loopback_ctrl(port, false); sport->tx_state = WAIT_AFTER_RTS; diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 338cb19dec23..d409ef388721 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,14 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Port startup definitions */ +#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */ + +/* Crystal-related definitions */ +#define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ + /* MAX3107 specific */ #define MAX3107_REV_ID (0xa0) @@ -583,7 +591,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) return 1; } -static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, +static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, unsigned long freq, bool xtal) { unsigned int div, clksrc, pllcfg = 0; @@ -641,12 +649,20 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val; - msleep(10); - regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); - if (!(val & MAX310X_STS_CLKREADY_BIT)) { - dev_warn(dev, "clock is not stable yet\n"); - } + bool stable = false; + unsigned int try = 0, val = 0; + + do { + msleep(MAX310X_XTAL_WAIT_DELAY_MS); + regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); + + if (val & MAX310X_STS_CLKREADY_BIT) + stable = true; + } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); + + if (!stable) + return dev_err_probe(dev, -EAGAIN, + "clock is not stable\n"); } return bestfreq; @@ -1274,7 +1290,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty { int i, ret, fmin, fmax, freq; struct max310x_port *s; - u32 uartclk = 0; + s32 uartclk = 0; bool xtal; for (i = 0; i < devtype->nr; i++) @@ -1337,6 +1353,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; for (i = 0; i < devtype->nr; i++) { + bool started = false; + unsigned int try = 0, val = 0; + /* Reset port */ regmap_write(regmaps[i], MAX310X_MODE2_REG, MAX310X_MODE2_RST_BIT); @@ -1345,13 +1364,27 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty /* Wait for port startup */ do { - regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); - } while (ret != 0x01); + msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS); + regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val); + + if (val == 0x01) + started = true; + } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES)); + + if (!started) { + ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n"); + goto out_uart; + } regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); } uartclk = max310x_set_ref_clk(dev, s, freq, xtal); + if (uartclk < 0) { + ret = uartclk; + goto out_uart; + } + dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk); for (i = 0; i < devtype->nr; i++) { @@ -1426,7 +1459,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty if (!ret) return 0; - dev_err(dev, "Unable to reguest IRQ %i\n", irq); + dev_err(dev, "Unable to request IRQ %i\n", irq); out_uart: for (i = 0; i < devtype->nr; i++) { @@ -1600,13 +1633,16 @@ static unsigned short max310x_i2c_slave_addr(unsigned short addr, static int max310x_i2c_probe(struct i2c_client *client) { - const struct max310x_devtype *devtype = - device_get_match_data(&client->dev); + const struct max310x_devtype *devtype; struct i2c_client *port_client; struct regmap *regmaps[4]; unsigned int i; u8 port_addr; + devtype = device_get_match_data(&client->dev); + if (!devtype) + return dev_err_probe(&client->dev, -ENODEV, "Failed to match device\n"); + if (client->addr < devtype->slave_addr.min || client->addr > devtype->slave_addr.max) return dev_err_probe(&client->dev, -EINVAL, diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index aa2c51b84116..589daed19e62 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -996,11 +996,10 @@ static unsigned int s3c24xx_serial_tx_empty(struct uart_port *port) if ((ufstat & info->tx_fifomask) != 0 || (ufstat & info->tx_fifofull)) return 0; - - return 1; + return TIOCSER_TEMT; } - return s3c24xx_serial_txempty_nofifo(port); + return s3c24xx_serial_txempty_nofifo(port) ? TIOCSER_TEMT : 0; } /* no modem control lines */ diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index db33790e6675..e331b57d6d7d 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -301,8 +301,8 @@ /* Misc definitions */ +#define SC16IS7XX_SPI_READ_BIT BIT(7) #define SC16IS7XX_FIFO_SIZE (64) -#define SC16IS7XX_REG_SHIFT 2 #define SC16IS7XX_GPIOS_PER_BANK 4 struct sc16is7xx_devtype { @@ -323,7 +323,8 @@ struct sc16is7xx_one_config { struct sc16is7xx_one { struct uart_port port; - u8 line; + struct regmap *regmap; + struct mutex efr_lock; /* EFR registers access */ struct kthread_work tx_work; struct kthread_work reg_work; struct kthread_delayed_work ms_work; @@ -334,7 +335,6 @@ struct sc16is7xx_one { struct sc16is7xx_port { const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; struct clk *clk; #ifdef CONFIG_GPIOLIB struct gpio_chip gpio; @@ -344,7 +344,6 @@ struct sc16is7xx_port { unsigned char buf[SC16IS7XX_FIFO_SIZE]; struct kthread_worker kworker; struct task_struct *kworker_task; - struct mutex efr_lock; struct sc16is7xx_one p[]; }; @@ -362,48 +361,35 @@ static void sc16is7xx_stop_tx(struct uart_port *port); #define to_sc16is7xx_port(p,e) ((container_of((p), struct sc16is7xx_port, e))) #define to_sc16is7xx_one(p,e) ((container_of((p), struct sc16is7xx_one, e))) -static int sc16is7xx_line(struct uart_port *port) -{ - struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - - return one->line; -} - static u8 sc16is7xx_port_read(struct uart_port *port, u8 reg) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int val = 0; - const u8 line = sc16is7xx_line(port); - regmap_read(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, &val); + regmap_read(one->regmap, reg, &val); return val; } static void sc16is7xx_port_write(struct uart_port *port, u8 reg, u8 val) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regmap_write(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, val); + regmap_write(one->regmap, reg, val); } static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen) { struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); - u8 addr = (SC16IS7XX_RHR_REG << SC16IS7XX_REG_SHIFT) | line; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regcache_cache_bypass(s->regmap, true); - regmap_raw_read(s->regmap, addr, s->buf, rxlen); - regcache_cache_bypass(s->regmap, false); + regmap_noinc_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen); } static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) { struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); - u8 addr = (SC16IS7XX_THR_REG << SC16IS7XX_REG_SHIFT) | line; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); /* * Don't send zero-length data, at least on SPI it confuses the chip @@ -412,32 +398,15 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) if (unlikely(!to_send)) return; - regcache_cache_bypass(s->regmap, true); - regmap_raw_write(s->regmap, addr, s->buf, to_send); - regcache_cache_bypass(s->regmap, false); + regmap_noinc_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send); } static void sc16is7xx_port_update(struct uart_port *port, u8 reg, u8 mask, u8 val) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regmap_update_bits(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, - mask, val); -} - -static int sc16is7xx_alloc_line(void) -{ - int i; - - BUILD_BUG_ON(SC16IS7XX_MAX_DEVS > BITS_PER_LONG); - - for (i = 0; i < SC16IS7XX_MAX_DEVS; i++) - if (!test_and_set_bit(i, &sc16is7xx_lines)) - break; - - return i; + regmap_update_bits(one->regmap, reg, mask, val); } static void sc16is7xx_power(struct uart_port *port, int on) @@ -479,7 +448,7 @@ static const struct sc16is7xx_devtype sc16is762_devtype = { static bool sc16is7xx_regmap_volatile(struct device *dev, unsigned int reg) { - switch (reg >> SC16IS7XX_REG_SHIFT) { + switch (reg) { case SC16IS7XX_RHR_REG: case SC16IS7XX_IIR_REG: case SC16IS7XX_LSR_REG: @@ -498,7 +467,7 @@ static bool sc16is7xx_regmap_volatile(struct device *dev, unsigned int reg) static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) { - switch (reg >> SC16IS7XX_REG_SHIFT) { + switch (reg) { case SC16IS7XX_RHR_REG: return true; default: @@ -508,9 +477,14 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) return false; } +static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg) +{ + return reg == SC16IS7XX_RHR_REG; +} + static int sc16is7xx_set_baud(struct uart_port *port, int baud) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); u8 lcr; u8 prescaler = 0; unsigned long clk = port->uartclk, div = clk / 16 / baud; @@ -533,7 +507,7 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) * because the bulk of the interrupt processing is run as a workqueue * job in thread context. */ - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); @@ -542,17 +516,17 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_LCR_CONF_MODE_B); /* Enable enhanced features */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_ENABLE_BIT, SC16IS7XX_EFR_ENABLE_BIT); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_CLKSEL_BIT, @@ -563,10 +537,10 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_LCR_CONF_MODE_A); /* Write the new divisor */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_write(port, SC16IS7XX_DLH_REG, div / 256); sc16is7xx_port_write(port, SC16IS7XX_DLL_REG, div % 256); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); @@ -667,9 +641,9 @@ static void sc16is7xx_handle_tx(struct uart_port *port) } if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_stop_tx(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return; } @@ -698,13 +672,15 @@ static void sc16is7xx_handle_tx(struct uart_port *port) sc16is7xx_fifo_write(port, to_send); } - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); if (uart_circ_empty(xmit)) sc16is7xx_stop_tx(port); - spin_unlock_irqrestore(&port->lock, flags); + else + sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); + uart_port_unlock_irqrestore(port, flags); } static unsigned int sc16is7xx_get_hwmctrl(struct uart_port *port) @@ -722,11 +698,10 @@ static unsigned int sc16is7xx_get_hwmctrl(struct uart_port *port) static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) { struct uart_port *port = &one->port; - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); unsigned long flags; unsigned int status, changed; - lockdep_assert_held_once(&s->efr_lock); + lockdep_assert_held_once(&one->efr_lock); status = sc16is7xx_get_hwmctrl(port); changed = status ^ one->old_mctrl; @@ -736,7 +711,7 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) one->old_mctrl = status; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); if ((changed & TIOCM_RNG) && (status & TIOCM_RNG)) port->icount.rng++; if (changed & TIOCM_DSR) @@ -747,79 +722,82 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) uart_handle_cts_change(port, status & TIOCM_CTS); wake_up_interruptible(&port->state->port.delta_msr_wait); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) { + bool rc = true; + unsigned int iir, rxlen; struct uart_port *port = &s->p[portno].port; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - do { - unsigned int iir, rxlen; - struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); + mutex_lock(&one->efr_lock); - iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); - if (iir & SC16IS7XX_IIR_NO_INT_BIT) - return false; + iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); + if (iir & SC16IS7XX_IIR_NO_INT_BIT) { + rc = false; + goto out_port_irq; + } - iir &= SC16IS7XX_IIR_ID_MASK; + iir &= SC16IS7XX_IIR_ID_MASK; - switch (iir) { - case SC16IS7XX_IIR_RDI_SRC: - case SC16IS7XX_IIR_RLSE_SRC: - case SC16IS7XX_IIR_RTOI_SRC: - case SC16IS7XX_IIR_XOFFI_SRC: - rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); + switch (iir) { + case SC16IS7XX_IIR_RDI_SRC: + case SC16IS7XX_IIR_RLSE_SRC: + case SC16IS7XX_IIR_RTOI_SRC: + case SC16IS7XX_IIR_XOFFI_SRC: + rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); - /* - * There is a silicon bug that makes the chip report a - * time-out interrupt but no data in the FIFO. This is - * described in errata section 18.1.4. - * - * When this happens, read one byte from the FIFO to - * clear the interrupt. - */ - if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) - rxlen = 1; + /* + * There is a silicon bug that makes the chip report a + * time-out interrupt but no data in the FIFO. This is + * described in errata section 18.1.4. + * + * When this happens, read one byte from the FIFO to + * clear the interrupt. + */ + if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) + rxlen = 1; - if (rxlen) - sc16is7xx_handle_rx(port, rxlen, iir); - break; + if (rxlen) + sc16is7xx_handle_rx(port, rxlen, iir); + break; /* CTSRTS interrupt comes only when CTS goes inactive */ - case SC16IS7XX_IIR_CTSRTS_SRC: - case SC16IS7XX_IIR_MSI_SRC: - sc16is7xx_update_mlines(one); - break; - case SC16IS7XX_IIR_THRI_SRC: - sc16is7xx_handle_tx(port); - break; - default: - dev_err_ratelimited(port->dev, - "ttySC%i: Unexpected interrupt: %x", - port->line, iir); - break; - } - } while (0); - return true; + case SC16IS7XX_IIR_CTSRTS_SRC: + case SC16IS7XX_IIR_MSI_SRC: + sc16is7xx_update_mlines(one); + break; + case SC16IS7XX_IIR_THRI_SRC: + sc16is7xx_handle_tx(port); + break; + default: + dev_err_ratelimited(port->dev, + "ttySC%i: Unexpected interrupt: %x", + port->line, iir); + break; + } + +out_port_irq: + mutex_unlock(&one->efr_lock); + + return rc; } static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) { + bool keep_polling; + struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id; - mutex_lock(&s->efr_lock); - - while (1) { - bool keep_polling = false; + do { int i; + keep_polling = false; + for (i = 0; i < s->devtype->nr_uart; ++i) keep_polling |= sc16is7xx_port_irq(s, i); - if (!keep_polling) - break; - } - - mutex_unlock(&s->efr_lock); + } while (keep_polling); return IRQ_HANDLED; } @@ -827,20 +805,15 @@ static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) static void sc16is7xx_tx_proc(struct kthread_work *ws) { struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port); - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - unsigned long flags; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); if ((port->rs485.flags & SER_RS485_ENABLED) && (port->rs485.delay_rts_before_send > 0)) msleep(port->rs485.delay_rts_before_send); - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_handle_tx(port); - mutex_unlock(&s->efr_lock); - - spin_lock_irqsave(&port->lock, flags); - sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + mutex_unlock(&one->efr_lock); } static void sc16is7xx_reconf_rs485(struct uart_port *port) @@ -851,14 +824,14 @@ static void sc16is7xx_reconf_rs485(struct uart_port *port) struct serial_rs485 *rs485 = &port->rs485; unsigned long irqflags; - spin_lock_irqsave(&port->lock, irqflags); + uart_port_lock_irqsave(port, &irqflags); if (rs485->flags & SER_RS485_ENABLED) { efcr |= SC16IS7XX_EFCR_AUTO_RS485_BIT; if (rs485->flags & SER_RS485_RTS_AFTER_SEND) efcr |= SC16IS7XX_EFCR_RTS_INVERT_BIT; } - spin_unlock_irqrestore(&port->lock, irqflags); + uart_port_unlock_irqrestore(port, irqflags); sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG, mask, efcr); } @@ -869,10 +842,10 @@ static void sc16is7xx_reg_proc(struct kthread_work *ws) struct sc16is7xx_one_config config; unsigned long irqflags; - spin_lock_irqsave(&one->port.lock, irqflags); + uart_port_lock_irqsave(&one->port, &irqflags); config = one->config; memset(&one->config, 0, sizeof(one->config)); - spin_unlock_irqrestore(&one->port.lock, irqflags); + uart_port_unlock_irqrestore(&one->port, irqflags); if (config.flags & SC16IS7XX_RECONF_MD) { u8 mcr = 0; @@ -943,9 +916,9 @@ static void sc16is7xx_ms_proc(struct kthread_work *ws) struct sc16is7xx_port *s = dev_get_drvdata(one->port.dev); if (one->port.state) { - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_update_mlines(one); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); kthread_queue_delayed_work(&s->kworker, &one->ms_work, HZ); } @@ -978,18 +951,18 @@ static void sc16is7xx_throttle(struct uart_port *port) * value set in MCR register. Stop reading data from RX FIFO so the * AutoRTS feature will de-activate RTS output. */ - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_ier_clear(port, SC16IS7XX_IER_RDI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static void sc16is7xx_unthrottle(struct uart_port *port) { unsigned long flags; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_ier_set(port, SC16IS7XX_IER_RDI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static unsigned int sc16is7xx_tx_empty(struct uart_port *port) @@ -1029,7 +1002,6 @@ static void sc16is7xx_set_termios(struct uart_port *port, struct ktermios *termios, const struct ktermios *old) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int lcr, flow = 0; int baud; @@ -1088,13 +1060,13 @@ static void sc16is7xx_set_termios(struct uart_port *port, port->ignore_status_mask |= SC16IS7XX_LSR_BRK_ERROR_MASK; /* As above, claim the mutex while accessing the EFR. */ - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); /* Configure flow control */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_write(port, SC16IS7XX_XON1_REG, termios->c_cc[VSTART]); sc16is7xx_port_write(port, SC16IS7XX_XOFF1_REG, termios->c_cc[VSTOP]); @@ -1113,12 +1085,12 @@ static void sc16is7xx_set_termios(struct uart_port *port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_FLOWCTRL_BITS, flow); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Update LCR register */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); /* Get baud rate generator configuration */ baud = uart_get_baud_rate(port, termios, old, @@ -1128,7 +1100,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, /* Setup baudrate generator */ baud = sc16is7xx_set_baud(port, baud); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); /* Update timeout according to new baud rate */ uart_update_timeout(port, termios->c_cflag, baud); @@ -1136,7 +1108,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, if (UART_ENABLE_MS(port, termios->c_cflag)) sc16is7xx_enable_ms(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static int sc16is7xx_config_rs485(struct uart_port *port, struct ktermios *termios, @@ -1164,7 +1136,6 @@ static int sc16is7xx_config_rs485(struct uart_port *port, struct ktermios *termi static int sc16is7xx_startup(struct uart_port *port) { struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); unsigned int val; unsigned long flags; @@ -1181,7 +1152,7 @@ static int sc16is7xx_startup(struct uart_port *port) sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); /* Enable write access to enhanced features and internal clock div */ sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, @@ -1199,7 +1170,7 @@ static int sc16is7xx_startup(struct uart_port *port) SC16IS7XX_TCR_RX_RESUME(24) | SC16IS7XX_TCR_RX_HALT(48)); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Now, initialize the UART */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_WORD_LEN_8); @@ -1223,9 +1194,9 @@ static int sc16is7xx_startup(struct uart_port *port) sc16is7xx_port_write(port, SC16IS7XX_IER_REG, val); /* Enable modem status polling */ - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_enable_ms(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return 0; } @@ -1427,7 +1398,8 @@ static int sc16is7xx_setup_gpio_chip(struct sc16is7xx_port *s) /* * Configure ports designated to operate as modem control lines. */ -static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s) +static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s, + struct regmap *regmap) { int i; int ret; @@ -1456,8 +1428,8 @@ static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s) if (s->mctrl_mask) regmap_update_bits( - s->regmap, - SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT, + regmap, + SC16IS7XX_IOCONTROL_REG, SC16IS7XX_IOCONTROL_MODEM_A_BIT | SC16IS7XX_IOCONTROL_MODEM_B_BIT, s->mctrl_mask); @@ -1472,7 +1444,7 @@ static const struct serial_rs485 sc16is7xx_rs485_supported = { static int sc16is7xx_probe(struct device *dev, const struct sc16is7xx_devtype *devtype, - struct regmap *regmap, int irq) + struct regmap *regmaps[], int irq) { unsigned long freq = 0, *pfreq = dev_get_platdata(dev); unsigned int val; @@ -1480,16 +1452,20 @@ static int sc16is7xx_probe(struct device *dev, int i, ret; struct sc16is7xx_port *s; - if (IS_ERR(regmap)) - return PTR_ERR(regmap); + for (i = 0; i < devtype->nr_uart; i++) + if (IS_ERR(regmaps[i])) + return PTR_ERR(regmaps[i]); /* * This device does not have an identification register that would * tell us if we are really connected to the correct device. * The best we can do is to check if communication is at all possible. + * + * Note: regmap[0] is used in the probe function to access registers + * common to all channels/ports, as it is guaranteed to be present on + * all variants. */ - ret = regmap_read(regmap, - SC16IS7XX_LSR_REG << SC16IS7XX_REG_SHIFT, &val); + ret = regmap_read(regmaps[0], SC16IS7XX_LSR_REG, &val); if (ret < 0) return -EPROBE_DEFER; @@ -1523,10 +1499,8 @@ static int sc16is7xx_probe(struct device *dev, return -EINVAL; } - s->regmap = regmap; s->devtype = devtype; dev_set_drvdata(dev, s); - mutex_init(&s->efr_lock); kthread_init_worker(&s->kworker); s->kworker_task = kthread_run(kthread_worker_fn, &s->kworker, @@ -1538,11 +1512,17 @@ static int sc16is7xx_probe(struct device *dev, sched_set_fifo(s->kworker_task); /* reset device, purging any pending irq / data */ - regmap_write(s->regmap, SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT, - SC16IS7XX_IOCONTROL_SRESET_BIT); + regmap_write(regmaps[0], SC16IS7XX_IOCONTROL_REG, + SC16IS7XX_IOCONTROL_SRESET_BIT); for (i = 0; i < devtype->nr_uart; ++i) { - s->p[i].line = i; + s->p[i].port.line = find_first_zero_bit(&sc16is7xx_lines, + SC16IS7XX_MAX_DEVS); + if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) { + ret = -ERANGE; + goto out_ports; + } + /* Initialize port data */ s->p[i].port.dev = dev; s->p[i].port.irq = irq; @@ -1562,12 +1542,9 @@ static int sc16is7xx_probe(struct device *dev, s->p[i].port.rs485_supported = sc16is7xx_rs485_supported; s->p[i].port.ops = &sc16is7xx_ops; s->p[i].old_mctrl = 0; - s->p[i].port.line = sc16is7xx_alloc_line(); + s->p[i].regmap = regmaps[i]; - if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) { - ret = -ENOMEM; - goto out_ports; - } + mutex_init(&s->p[i].efr_lock); ret = uart_get_rs485_mode(&s->p[i].port); if (ret) @@ -1584,20 +1561,25 @@ static int sc16is7xx_probe(struct device *dev, kthread_init_work(&s->p[i].tx_work, sc16is7xx_tx_proc); kthread_init_work(&s->p[i].reg_work, sc16is7xx_reg_proc); kthread_init_delayed_work(&s->p[i].ms_work, sc16is7xx_ms_proc); + /* Register port */ - uart_add_one_port(&sc16is7xx_uart, &s->p[i].port); + ret = uart_add_one_port(&sc16is7xx_uart, &s->p[i].port); + if (ret) + goto out_ports; + + set_bit(s->p[i].port.line, &sc16is7xx_lines); /* Enable EFR */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(regmaps[i], true); /* Enable write access to enhanced features */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_ENABLE_BIT); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(regmaps[i], false); /* Restore access to general registers */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, 0x00); @@ -1617,7 +1599,7 @@ static int sc16is7xx_probe(struct device *dev, s->p[u].irda_mode = true; } - ret = sc16is7xx_setup_mctrl_ports(s); + ret = sc16is7xx_setup_mctrl_ports(s, regmaps[0]); if (ret) goto out_ports; @@ -1652,10 +1634,9 @@ static int sc16is7xx_probe(struct device *dev, #endif out_ports: - for (i--; i >= 0; i--) { - uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); - clear_bit(s->p[i].port.line, &sc16is7xx_lines); - } + for (i = 0; i < devtype->nr_uart; i++) + if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines)) + uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); kthread_stop(s->kworker_task); @@ -1677,8 +1658,8 @@ static void sc16is7xx_remove(struct device *dev) for (i = 0; i < s->devtype->nr_uart; i++) { kthread_cancel_delayed_work_sync(&s->p[i].ms_work); - uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); - clear_bit(s->p[i].port.line, &sc16is7xx_lines); + if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines)) + uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); sc16is7xx_power(&s->p[i].port, 0); } @@ -1700,19 +1681,42 @@ static const struct of_device_id __maybe_unused sc16is7xx_dt_ids[] = { MODULE_DEVICE_TABLE(of, sc16is7xx_dt_ids); static struct regmap_config regcfg = { - .reg_bits = 7, - .pad_bits = 1, + .reg_bits = 5, + .pad_bits = 3, .val_bits = 8, .cache_type = REGCACHE_RBTREE, .volatile_reg = sc16is7xx_regmap_volatile, .precious_reg = sc16is7xx_regmap_precious, + .writeable_noinc_reg = sc16is7xx_regmap_noinc, + .readable_noinc_reg = sc16is7xx_regmap_noinc, + .max_raw_read = SC16IS7XX_FIFO_SIZE, + .max_raw_write = SC16IS7XX_FIFO_SIZE, + .max_register = SC16IS7XX_EFCR_REG, }; +static const char *sc16is7xx_regmap_name(u8 port_id) +{ + switch (port_id) { + case 0: return "port0"; + case 1: return "port1"; + default: + WARN_ON(true); + return NULL; + } +} + +static unsigned int sc16is7xx_regmap_port_mask(unsigned int port_id) +{ + /* CH1,CH0 are at bits 2:1. */ + return port_id << 1; +} + #ifdef CONFIG_SERIAL_SC16IS7XX_SPI static int sc16is7xx_spi_probe(struct spi_device *spi) { const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; + struct regmap *regmaps[2]; + unsigned int i; int ret; /* Setup SPI bus */ @@ -1737,11 +1741,20 @@ static int sc16is7xx_spi_probe(struct spi_device *spi) devtype = (struct sc16is7xx_devtype *)id_entry->driver_data; } - regcfg.max_register = (0xf << SC16IS7XX_REG_SHIFT) | - (devtype->nr_uart - 1); - regmap = devm_regmap_init_spi(spi, ®cfg); + for (i = 0; i < devtype->nr_uart; i++) { + regcfg.name = sc16is7xx_regmap_name(i); + /* + * If read_flag_mask is 0, the regmap code sets it to a default + * of 0x80. Since we specify our own mask, we must add the READ + * bit ourselves: + */ + regcfg.read_flag_mask = sc16is7xx_regmap_port_mask(i) | + SC16IS7XX_SPI_READ_BIT; + regcfg.write_flag_mask = sc16is7xx_regmap_port_mask(i); + regmaps[i] = devm_regmap_init_spi(spi, ®cfg); + } - return sc16is7xx_probe(&spi->dev, devtype, regmap, spi->irq); + return sc16is7xx_probe(&spi->dev, devtype, regmaps, spi->irq); } static void sc16is7xx_spi_remove(struct spi_device *spi) @@ -1780,7 +1793,8 @@ static int sc16is7xx_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; + struct regmap *regmaps[2]; + unsigned int i; if (i2c->dev.of_node) { devtype = device_get_match_data(&i2c->dev); @@ -1790,11 +1804,14 @@ static int sc16is7xx_i2c_probe(struct i2c_client *i2c, devtype = (struct sc16is7xx_devtype *)id->driver_data; } - regcfg.max_register = (0xf << SC16IS7XX_REG_SHIFT) | - (devtype->nr_uart - 1); - regmap = devm_regmap_init_i2c(i2c, ®cfg); + for (i = 0; i < devtype->nr_uart; i++) { + regcfg.name = sc16is7xx_regmap_name(i); + regcfg.read_flag_mask = sc16is7xx_regmap_port_mask(i); + regcfg.write_flag_mask = sc16is7xx_regmap_port_mask(i); + regmaps[i] = devm_regmap_init_i2c(i2c, ®cfg); + } - return sc16is7xx_probe(&i2c->dev, devtype, regmap, i2c->irq); + return sc16is7xx_probe(&i2c->dev, devtype, regmaps, i2c->irq); } static void sc16is7xx_i2c_remove(struct i2c_client *client) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index f0ed30d0a697..fe3f1d655dfe 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2561,7 +2561,12 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, port->type = PORT_UNKNOWN; flags |= UART_CONFIG_TYPE; } + /* Synchronize with possible boot console. */ + if (uart_console(port)) + console_lock(); port->ops->config_port(port, flags); + if (uart_console(port)) + console_unlock(); } if (port->type != PORT_UNKNOWN) { @@ -2569,6 +2574,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, uart_report_port(drv, port); + /* Synchronize with possible boot console. */ + if (uart_console(port)) + console_lock(); + /* Power up port for set_mctrl() */ uart_change_pm(state, UART_PM_STATE_ON); @@ -2585,6 +2594,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, uart_rs485_config(port); + if (uart_console(port)) + console_unlock(); + /* * If this driver supports console, and it hasn't been * successfully registered yet, try to re-register it. diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 8767c504b95d..9d3b237304ea 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -859,7 +859,7 @@ int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: - if (!capable(CAP_SYS_ADMIN)) + if (!checkpoint_restore_ns_capable(&init_user_ns)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios(&kterm, @@ -876,7 +876,7 @@ int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: - if (!capable(CAP_SYS_ADMIN)) + if (!checkpoint_restore_ns_capable(&init_user_ns)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios_1(&kterm, diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 981d2bfcf9a5..48a9ed7c93c9 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -398,7 +398,7 @@ static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr) char32_t *ln = uniscr->lines[vc->state.y]; unsigned int x = vc->state.x, cols = vc->vc_cols; - memcpy(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); + memmove(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); memset32(&ln[cols - nr], ' ', nr); } } @@ -2515,7 +2515,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } return; case EScsiignore: - if (c >= 20 && c <= 0x3f) + if (c >= 0x20 && c <= 0x3f) return; vc->vc_state = ESnormal; return; diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 474e94a69b18..f3c25467e571 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6159,7 +6159,6 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) ufshcd_hold(hba, false); if (!ufshcd_is_clkgating_allowed(hba)) ufshcd_setup_clocks(hba, true); - ufshcd_release(hba); pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM; ufshcd_vops_resume(hba, pm_op); } else { @@ -8333,12 +8332,9 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) out: pm_runtime_put_sync(hba->dev); - /* - * If we failed to initialize the device or the device is not - * present, turn off the power/clocks etc. - */ + if (ret) - ufshcd_hba_exit(hba); + dev_err(hba->dev, "%s failed: %d\n", __func__, ret); } static const struct attribute_group *ufshcd_driver_groups[] = { diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index ccdd525bd7c8..2b8f98f0707e 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -826,7 +826,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, return; } - if (request->complete) { + /* + * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify + * gadget composite driver. + */ + if (request->complete && request->buf != priv_dev->zlp_buf) { spin_unlock(&priv_dev->lock); usb_gadget_giveback_request(&priv_ep->endpoint, request); @@ -2537,11 +2541,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep) while (!list_empty(&priv_ep->wa2_descmiss_req_list)) { priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list); + list_del_init(&priv_req->list); kfree(priv_req->request.buf); cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); - list_del_init(&priv_req->list); --priv_ep->wa2_counter; } diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 7b20d2d5c262..7242591b346b 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -394,7 +394,6 @@ pm_put: return ret; } - /** * cdns_wakeup_irq - interrupt handler for wakeup events * @irq: irq number for cdns3/cdnsp core device diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index d00ff98dffab..33ba30f79b33 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns) */ static void cdns_otg_disable_irq(struct cdns *cdns) { - writel(0, &cdns->otg_irq_regs->ien); + if (cdns->version) + writel(0, &cdns->otg_irq_regs->ien); } /** @@ -418,15 +419,20 @@ int cdns_drd_init(struct cdns *cdns) cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd; - if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) { + state = readl(&cdns->otg_cdnsp_regs->did); + + if (OTG_CDNSP_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_cdnsp_regs->ien; cdns->version = CDNSP_CONTROLLER_V2; - } else { + } else if (OTG_CDNS3_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_v1_regs->ien; writel(1, &cdns->otg_v1_regs->simulate); cdns->version = CDNS3_CONTROLLER_V1; + } else { + dev_err(cdns->dev, "not supporte DID=0x%08x\n", state); + return -EINVAL; } dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n", @@ -479,7 +485,6 @@ int cdns_drd_exit(struct cdns *cdns) return 0; } - /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h index cbdf94f73ed9..d72370c321d3 100644 --- a/drivers/usb/cdns3/drd.h +++ b/drivers/usb/cdns3/drd.h @@ -79,7 +79,11 @@ struct cdnsp_otg_regs { __le32 susp_timing_ctrl; }; -#define OTG_CDNSP_DID 0x0004034E +/* CDNSP driver supports 0x000403xx Cadence USB controller family. */ +#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300) + +/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */ +#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200) /* * Common registers interface for both CDNS3 and CDNSP version of DRD. diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 6164fc4c96a4..ceca4d839dfd 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -18,6 +18,11 @@ #include "../host/xhci.h" #include "../host/xhci-plat.h" +/* + * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only + * in Cadence USB3 dual-role controller, so it can't be used + * with Cadence CDNSP dual-role controller. + */ #define XECP_PORT_CAP_REG 0x8000 #define XECP_AUX_CTRL_REG1 0x8120 @@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .resume_quirk = xhci_cdns3_resume_quirk, }; +static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; + static int __cdns_host_init(struct cdns *cdns) { struct platform_device *xhci; @@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns) goto err1; } - cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, - sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (cdns->version < CDNSP_CONTROLLER_V2) + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + else + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (!cdns->xhci_plat_data) { ret = -ENOMEM; goto err1; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 1f0951be15ab..fdc1a66b129a 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -485,6 +485,7 @@ out_free_mem: static int service_outstanding_interrupt(struct wdm_device *desc) { int rv = 0; + int used; /* submit read urb only if the device is waiting for it */ if (!desc->resp_count || !--desc->resp_count) @@ -499,7 +500,10 @@ static int service_outstanding_interrupt(struct wdm_device *desc) goto out; } - set_bit(WDM_RESPONDING, &desc->flags); + used = test_and_set_bit(WDM_RESPONDING, &desc->flags); + if (used) + goto out; + spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 38703781ee2d..1283c427cdf8 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; } - root = debugfs_create_dir(dev_name(dev), ulpi_root); + root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root); debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops); dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a44b8ac3b005..0dc1b2f4b44d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -47,12 +47,18 @@ #define USB_VENDOR_TEXAS_INSTRUMENTS 0x0451 #define USB_PRODUCT_TUSB8041_USB3 0x8140 #define USB_PRODUCT_TUSB8041_USB2 0x8142 -#define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 -#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 +#define USB_VENDOR_MICROCHIP 0x0424 +#define USB_PRODUCT_USB4913 0x4913 +#define USB_PRODUCT_USB4914 0x4914 +#define USB_PRODUCT_USB4915 0x4915 +#define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND BIT(0) +#define HUB_QUIRK_DISABLE_AUTOSUSPEND BIT(1) +#define HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL BIT(2) #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ #define USB_PING_RESPONSE_TIME 400 /* ns */ +#define USB_REDUCE_FRAME_INTR_BINTERVAL 9 /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -117,7 +123,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 -static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, @@ -679,14 +684,14 @@ static void kick_hub_wq(struct usb_hub *hub) */ intf = to_usb_interface(hub->intfdev); usb_autopm_get_interface_no_resume(intf); - kref_get(&hub->kref); + hub_get(hub); if (queue_work(hub_wq, &hub->events)) return; /* the work has already been scheduled */ usb_autopm_put_interface_async(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } void usb_kick_hub_wq(struct usb_device *hdev) @@ -1054,7 +1059,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } - kref_get(&hub->kref); + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1302,7 +1307,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } - kref_put(&hub->kref, hub_release); + hub_put(hub); } /* Implement the continuations for the delays above */ @@ -1718,6 +1723,16 @@ static void hub_release(struct kref *kref) kfree(hub); } +void hub_get(struct usb_hub *hub) +{ + kref_get(&hub->kref); +} + +void hub_put(struct usb_hub *hub) +{ + kref_put(&hub->kref, hub_release); +} + static unsigned highspeed_hubs; static void hub_disconnect(struct usb_interface *intf) @@ -1766,7 +1781,7 @@ static void hub_disconnect(struct usb_interface *intf) onboard_hub_destroy_pdevs(&hub->onboard_hub_devs); - kref_put(&hub->kref, hub_release); + hub_put(hub); } static bool hub_descriptor_is_sane(struct usb_host_interface *desc) @@ -1905,6 +1920,14 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) usb_autopm_get_interface_no_resume(intf); } + if ((id->driver_info & HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL) && + desc->endpoint[0].desc.bInterval > USB_REDUCE_FRAME_INTR_BINTERVAL) { + desc->endpoint[0].desc.bInterval = + USB_REDUCE_FRAME_INTR_BINTERVAL; + /* Tell the HCD about the interrupt ep's new bInterval */ + usb_set_interface(hdev, 0, 0); + } + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) { onboard_hub_create_pdevs(hdev, &hub->onboard_hub_devs); @@ -2376,17 +2399,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev) } } else if (desc->bLength == sizeof (struct usb_otg_descriptor)) { - /* Set a_alt_hnp_support for legacy otg device */ - err = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_SET_FEATURE, 0, - USB_DEVICE_A_ALT_HNP_SUPPORT, - 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); - if (err < 0) - dev_err(&udev->dev, - "set a_alt_hnp_support failed: %d\n", - err); + /* + * We are operating on a legacy OTP device + * These should be told that they are operating + * on the wrong port if we have another port that does + * support HNP + */ + if (bus->otg_port != 0) { + /* Set a_alt_hnp_support for legacy otg device */ + err = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_SET_FEATURE, 0, + USB_DEVICE_A_ALT_HNP_SUPPORT, + 0, NULL, 0, + USB_CTRL_SET_TIMEOUT); + if (err < 0) + dev_err(&udev->dev, + "set a_alt_hnp_support failed: %d\n", + err); + } } } #endif @@ -5854,7 +5885,7 @@ out_hdev_lock: /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); kcov_remote_stop(); } @@ -5887,6 +5918,21 @@ static const struct usb_device_id hub_id_table[] = { .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS, .idProduct = USB_PRODUCT_TUSB8041_USB3, .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + | USB_DEVICE_ID_MATCH_PRODUCT, + .idVendor = USB_VENDOR_MICROCHIP, + .idProduct = USB_PRODUCT_USB4913, + .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + | USB_DEVICE_ID_MATCH_PRODUCT, + .idVendor = USB_VENDOR_MICROCHIP, + .idProduct = USB_PRODUCT_USB4914, + .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + | USB_DEVICE_ID_MATCH_PRODUCT, + .idVendor = USB_VENDOR_MICROCHIP, + .idProduct = USB_PRODUCT_USB4915, + .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, { .match_flags = USB_DEVICE_ID_MATCH_DEV_CLASS, .bDeviceClass = USB_CLASS_HUB}, { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS, diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index bc66205ca52c..1085c72335d5 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -118,6 +118,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub, extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub, int port1, bool set); extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev); +extern void hub_get(struct usb_hub *hub); +extern void hub_put(struct usb_hub *hub); extern int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected); extern int usb_clear_port_feature(struct usb_device *hdev, diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 38c1a4f4fdea..e91fa567d08d 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -28,11 +28,22 @@ static ssize_t disable_show(struct device *dev, u16 portstatus, unused; bool disabled; int rc; + struct kernfs_node *kn; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -42,9 +53,13 @@ static ssize_t disable_show(struct device *dev, usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); if (rc) return rc; @@ -62,15 +77,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, int port1 = port_dev->portnum; bool disabled; int rc; + struct kernfs_node *kn; rc = strtobool(buf, &disabled); if (rc) return rc; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -91,9 +117,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, if (!rc) rc = count; -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); return rc; } @@ -534,7 +564,7 @@ static int match_location(struct usb_device *peer_hdev, void *p) struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev); struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent); - if (!peer_hub) + if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED) return 0; hcd = bus_to_hcd(hdev->bus); @@ -545,7 +575,8 @@ static int match_location(struct usb_device *peer_hdev, void *p) for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) { peer = peer_hub->ports[port1 - 1]; - if (peer && peer->location == port_dev->location) { + if (peer && peer->connect_type != USB_PORT_NOT_USED && + peer->location == port_dev->location) { link_peers_report(port_dev, peer); return 1; /* done */ } diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index ccf6cd972269..5f1e07341f36 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -1170,14 +1170,24 @@ static ssize_t interface_authorized_store(struct device *dev, { struct usb_interface *intf = to_usb_interface(dev); bool val; + struct kernfs_node *kn; if (strtobool(buf, &val) != 0) return -EINVAL; - if (val) + if (val) { usb_authorize_interface(intf); - else - usb_deauthorize_interface(intf); + } else { + /* + * Prevent deadlock if another process is concurrently + * trying to unregister intf. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (kn) { + usb_deauthorize_interface(intf); + sysfs_unbreak_active_protection(kn); + } + } return count; } diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 85261198c420..4a4c28083ba6 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -729,8 +729,14 @@ struct dwc2_dregs_backup { * struct dwc2_hregs_backup - Holds host registers state before * entering partial power down * @hcfg: Backup of HCFG register + * @hflbaddr: Backup of HFLBADDR register * @haintmsk: Backup of HAINTMSK register + * @hcchar: Backup of HCCHAR register + * @hcsplt: Backup of HCSPLT register * @hcintmsk: Backup of HCINTMSK register + * @hctsiz: Backup of HCTSIZ register + * @hdma: Backup of HCDMA register + * @hcdmab: Backup of HCDMAB register * @hprt0: Backup of HPTR0 register * @hfir: Backup of HFIR register * @hptxfsiz: Backup of HPTXFSIZ register @@ -738,8 +744,14 @@ struct dwc2_dregs_backup { */ struct dwc2_hregs_backup { u32 hcfg; + u32 hflbaddr; u32 haintmsk; + u32 hcchar[MAX_EPS_CHANNELS]; + u32 hcsplt[MAX_EPS_CHANNELS]; u32 hcintmsk[MAX_EPS_CHANNELS]; + u32 hctsiz[MAX_EPS_CHANNELS]; + u32 hcidma[MAX_EPS_CHANNELS]; + u32 hcidmab[MAX_EPS_CHANNELS]; u32 hprt0; u32 hfir; u32 hptxfsiz; @@ -1087,6 +1099,7 @@ struct dwc2_hsotg { bool needs_byte_swap; /* DWC OTG HW Release versions */ +#define DWC2_CORE_REV_4_30a 0x4f54430a #define DWC2_CORE_REV_2_71a 0x4f54271a #define DWC2_CORE_REV_2_72a 0x4f54272a #define DWC2_CORE_REV_2_80a 0x4f54280a @@ -1324,6 +1337,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg); int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg); void dwc2_enable_acg(struct dwc2_hsotg *hsotg); +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup); /* This function should be called on every hardware interrupt. */ irqreturn_t dwc2_handle_common_intr(int irq, void *dev); diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index 158ede753854..26d752a4c3ca 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -297,7 +297,8 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } @@ -322,10 +323,11 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) * @hsotg: Programming view of DWC_otg controller * */ -static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup) { u32 glpmcfg; - u32 i = 0; + u32 pcgctl; + u32 dctl; if (hsotg->lx_state != DWC2_L1) { dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n"); @@ -334,37 +336,57 @@ static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) glpmcfg = dwc2_readl(hsotg, GLPMCFG); if (dwc2_is_device_mode(hsotg)) { - dev_dbg(hsotg->dev, "Exit from L1 state\n"); + dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup); glpmcfg &= ~GLPMCFG_ENBLSLPM; - glpmcfg &= ~GLPMCFG_HIRD_THRES_EN; + glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK; dwc2_writel(hsotg, glpmcfg, GLPMCFG); - do { - glpmcfg = dwc2_readl(hsotg, GLPMCFG); + pcgctl = dwc2_readl(hsotg, PCGCTL); + pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING; + dwc2_writel(hsotg, pcgctl, PCGCTL); - if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK | - GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS))) - break; + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_ENBESL) { + glpmcfg |= GLPMCFG_RSTRSLPSTS; + dwc2_writel(hsotg, glpmcfg, GLPMCFG); + } - udelay(1); - } while (++i < 200); + if (remotewakeup) { + if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__); + goto fail; + return; + } - if (i == 200) { - dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n"); + dctl = dwc2_readl(hsotg, DCTL); + dctl |= DCTL_RMTWKUPSIG; + dwc2_writel(hsotg, dctl, DCTL); + + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__); + goto fail; + return; + } + } + + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS || + glpmcfg & GLPMCFG_L1RESUMEOK) { + goto fail; return; } - dwc2_gadget_init_lpm(hsotg); + + /* Inform gadget to exit from L1 */ + call_gadget(hsotg, resume); + /* Change to L0 state */ + hsotg->lx_state = DWC2_L0; + hsotg->bus_suspended = false; +fail: dwc2_gadget_init_lpm(hsotg); } else { /* TODO */ dev_err(hsotg->dev, "Host side LPM is not supported.\n"); return; } - - /* Change to L0 state */ - hsotg->lx_state = DWC2_L0; - - /* Inform gadget to exit from L1 */ - call_gadget(hsotg, resume); } /* @@ -385,7 +407,7 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state); if (hsotg->lx_state == DWC2_L1) { - dwc2_wakeup_from_lpm_l1(hsotg); + dwc2_wakeup_from_lpm_l1(hsotg, false); return; } @@ -408,7 +430,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } else { /* Change to L0 state */ @@ -425,7 +448,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) } if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_host_exit_clock_gating(hsotg, 1); /* diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 7497d7e93548..da37aee22b72 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1420,6 +1420,10 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req, ep->name, req, req->length, req->buf, req->no_interrupt, req->zero, req->short_not_ok); + if (hs->lx_state == DWC2_L1) { + dwc2_wakeup_from_lpm_l1(hs, true); + } + /* Prevent new request submission when controller is suspended */ if (hs->lx_state != DWC2_L0) { dev_dbg(hs->dev, "%s: submit request only in active state\n", @@ -3763,6 +3767,12 @@ irq_retry: if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2) dwc2_exit_partial_power_down(hsotg, 0, true); + /* Exit gadget mode clock gating. */ + if (hsotg->params.power_down == + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) + dwc2_gadget_exit_clock_gating(hsotg, 0); + hsotg->lx_state = DWC2_L0; } diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 50a606cbfaba..35d855272030 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -2720,8 +2720,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry); - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the periodic ready schedule to the @@ -2754,8 +2757,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the non-periodic inactive schedule to the @@ -4166,6 +4172,8 @@ void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd, urb->actual_length); if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { + if (!hsotg->params.dma_desc_enable) + urb->start_frame = qtd->qh->start_active_frame; urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb); for (i = 0; i < urb->number_of_packets; ++i) { urb->iso_frame_desc[i].actual_length = @@ -4674,7 +4682,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, } if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else @@ -5438,9 +5446,16 @@ int dwc2_backup_host_registers(struct dwc2_hsotg *hsotg) /* Backup Host regs */ hr = &hsotg->hr_backup; hr->hcfg = dwc2_readl(hsotg, HCFG); + hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR); hr->haintmsk = dwc2_readl(hsotg, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i)); + hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i)); hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i)); + hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i)); + hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i)); + hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i)); + } hr->hprt0 = dwc2_read_hprt0(hsotg); hr->hfir = dwc2_readl(hsotg, HFIR); @@ -5474,10 +5489,17 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg) hr->valid = false; dwc2_writel(hsotg, hr->hcfg, HCFG); + dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR); dwc2_writel(hsotg, hr->haintmsk, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i)); + dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i)); dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i)); + dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i)); + dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i)); + dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i)); + } dwc2_writel(hsotg, hr->hprt0, HPRT0); dwc2_writel(hsotg, hr->hfir, HFIR); @@ -5642,10 +5664,12 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, dwc2_writel(hsotg, hr->hcfg, HCFG); /* De-assert Wakeup Logic */ - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn &= ~GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } hprt0 = hr->hprt0; hprt0 |= HPRT0_PWR; @@ -5670,6 +5694,13 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, hprt0 |= HPRT0_RES; dwc2_writel(hsotg, hprt0, HPRT0); + /* De-assert Wakeup Logic */ + if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } /* Wait for Resume time and then program HPRT again */ mdelay(100); hprt0 &= ~HPRT0_RES; diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c index 6b4d825e97a2..79582b102c7e 100644 --- a/drivers/usb/dwc2/hcd_ddma.c +++ b/drivers/usb/dwc2/hcd_ddma.c @@ -559,7 +559,7 @@ static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg, idx = qh->td_last; inc = qh->host_interval; hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg); - cur_idx = dwc2_frame_list_idx(hsotg->frame_number); + cur_idx = idx; next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed); /* @@ -866,6 +866,8 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, { struct dwc2_dma_desc *dma_desc; struct dwc2_hcd_iso_packet_desc *frame_desc; + u16 frame_desc_idx; + struct urb *usb_urb = qtd->urb->priv; u16 remain = 0; int rc = 0; @@ -878,8 +880,11 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, DMA_FROM_DEVICE); dma_desc = &qh->desc_list[idx]; + frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1); - frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last]; + frame_desc = &qtd->urb->iso_descs[frame_desc_idx]; + if (idx == qtd->isoc_td_first) + usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg); dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset); if (chan->ep_is_in) remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >> @@ -900,7 +905,7 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, frame_desc->status = 0; } - if (++qtd->isoc_frame_index == qtd->urb->packet_count) { + if (++qtd->isoc_frame_index == usb_urb->number_of_packets) { /* * urb->status is not used for isoc transfers here. The * individual frame_desc status are used instead. @@ -1005,11 +1010,11 @@ static void dwc2_complete_isoc_xfer_ddma(struct dwc2_hsotg *hsotg, return; idx = dwc2_desclist_idx_inc(idx, qh->host_interval, chan->speed); - if (!rc) + if (rc == 0) continue; - if (rc == DWC2_CMPL_DONE) - break; + if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP) + goto stop_scan; /* rc == DWC2_CMPL_STOP */ diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h index 13abdd5f6752..12f8c7f86dc9 100644 --- a/drivers/usb/dwc2/hw.h +++ b/drivers/usb/dwc2/hw.h @@ -698,7 +698,7 @@ #define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) #define TXSTS_QTOP_TOKEN_SHIFT 25 #define TXSTS_QTOP_TERMINATE BIT(24) -#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_MASK (0x7f << 16) #define TXSTS_QSPCAVAIL_SHIFT 16 #define TXSTS_FSPCAVAIL_MASK (0xffff << 0) #define TXSTS_FSPCAVAIL_SHIFT 0 diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 30452aed5493..5b74eec719cb 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -371,7 +371,7 @@ static int dwc2_driver_remove(struct platform_device *dev) /* Exit clock gating when driver is removed. */ if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else diff --git a/drivers/usb/dwc3/dwc3-am62.c b/drivers/usb/dwc3/dwc3-am62.c index 173cf3579c55..ad8a2eadb472 100644 --- a/drivers/usb/dwc3/dwc3-am62.c +++ b/drivers/usb/dwc3/dwc3-am62.c @@ -89,7 +89,7 @@ #define DWC3_AM62_AUTOSUSPEND_DELAY 100 -struct dwc3_data { +struct dwc3_am62 { struct device *dev; void __iomem *usbss; struct clk *usb2_refclk; @@ -115,19 +115,19 @@ static const int dwc3_ti_rate_table[] = { /* in KHZ */ 52000, }; -static inline u32 dwc3_ti_readl(struct dwc3_data *data, u32 offset) +static inline u32 dwc3_ti_readl(struct dwc3_am62 *am62, u32 offset) { - return readl((data->usbss) + offset); + return readl((am62->usbss) + offset); } -static inline void dwc3_ti_writel(struct dwc3_data *data, u32 offset, u32 value) +static inline void dwc3_ti_writel(struct dwc3_am62 *am62, u32 offset, u32 value) { - writel(value, (data->usbss) + offset); + writel(value, (am62->usbss) + offset); } -static int phy_syscon_pll_refclk(struct dwc3_data *data) +static int phy_syscon_pll_refclk(struct dwc3_am62 *am62) { - struct device *dev = data->dev; + struct device *dev = am62->dev; struct device_node *node = dev->of_node; struct of_phandle_args args; struct regmap *syscon; @@ -139,16 +139,16 @@ static int phy_syscon_pll_refclk(struct dwc3_data *data) return PTR_ERR(syscon); } - data->syscon = syscon; + am62->syscon = syscon; ret = of_parse_phandle_with_fixed_args(node, "ti,syscon-phy-pll-refclk", 1, 0, &args); if (ret) return ret; - data->offset = args.args[0]; + am62->offset = args.args[0]; - ret = regmap_update_bits(data->syscon, data->offset, PHY_PLL_REFCLK_MASK, data->rate_code); + ret = regmap_update_bits(am62->syscon, am62->offset, PHY_PLL_REFCLK_MASK, am62->rate_code); if (ret) { dev_err(dev, "failed to set phy pll reference clock rate\n"); return ret; @@ -161,32 +161,32 @@ static int dwc3_ti_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *node = pdev->dev.of_node; - struct dwc3_data *data; + struct dwc3_am62 *am62; int i, ret; unsigned long rate; u32 reg; - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); - if (!data) + am62 = devm_kzalloc(dev, sizeof(*am62), GFP_KERNEL); + if (!am62) return -ENOMEM; - data->dev = dev; - platform_set_drvdata(pdev, data); + am62->dev = dev; + platform_set_drvdata(pdev, am62); - data->usbss = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(data->usbss)) { + am62->usbss = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(am62->usbss)) { dev_err(dev, "can't map IOMEM resource\n"); - return PTR_ERR(data->usbss); + return PTR_ERR(am62->usbss); } - data->usb2_refclk = devm_clk_get(dev, "ref"); - if (IS_ERR(data->usb2_refclk)) { + am62->usb2_refclk = devm_clk_get(dev, "ref"); + if (IS_ERR(am62->usb2_refclk)) { dev_err(dev, "can't get usb2_refclk\n"); - return PTR_ERR(data->usb2_refclk); + return PTR_ERR(am62->usb2_refclk); } /* Calculate the rate code */ - rate = clk_get_rate(data->usb2_refclk); + rate = clk_get_rate(am62->usb2_refclk); rate /= 1000; // To KHz for (i = 0; i < ARRAY_SIZE(dwc3_ti_rate_table); i++) { if (dwc3_ti_rate_table[i] == rate) @@ -198,20 +198,20 @@ static int dwc3_ti_probe(struct platform_device *pdev) return -EINVAL; } - data->rate_code = i; + am62->rate_code = i; /* Read the syscon property and set the rate code */ - ret = phy_syscon_pll_refclk(data); + ret = phy_syscon_pll_refclk(am62); if (ret) return ret; /* VBUS divider select */ - data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); - reg = dwc3_ti_readl(data, USBSS_PHY_CONFIG); - if (data->vbus_divider) + am62->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); + reg = dwc3_ti_readl(am62, USBSS_PHY_CONFIG); + if (am62->vbus_divider) reg |= 1 << USBSS_PHY_VBUS_SEL_SHIFT; - dwc3_ti_writel(data, USBSS_PHY_CONFIG, reg); + dwc3_ti_writel(am62, USBSS_PHY_CONFIG, reg); pm_runtime_set_active(dev); pm_runtime_enable(dev); @@ -219,7 +219,7 @@ static int dwc3_ti_probe(struct platform_device *pdev) * Don't ignore its dependencies with its children */ pm_suspend_ignore_children(dev, false); - clk_prepare_enable(data->usb2_refclk); + clk_prepare_enable(am62->usb2_refclk); pm_runtime_get_noresume(dev); ret = of_platform_populate(node, NULL, NULL, dev); @@ -229,9 +229,9 @@ static int dwc3_ti_probe(struct platform_device *pdev) } /* Set mode valid bit to indicate role is valid */ - reg = dwc3_ti_readl(data, USBSS_MODE_CONTROL); + reg = dwc3_ti_readl(am62, USBSS_MODE_CONTROL); reg |= USBSS_MODE_VALID; - dwc3_ti_writel(data, USBSS_MODE_CONTROL, reg); + dwc3_ti_writel(am62, USBSS_MODE_CONTROL, reg); /* Setting up autosuspend */ pm_runtime_set_autosuspend_delay(dev, DWC3_AM62_AUTOSUSPEND_DELAY); @@ -241,35 +241,27 @@ static int dwc3_ti_probe(struct platform_device *pdev) return 0; err_pm_disable: - clk_disable_unprepare(data->usb2_refclk); + clk_disable_unprepare(am62->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); return ret; } -static int dwc3_ti_remove_core(struct device *dev, void *c) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - return 0; -} - static int dwc3_ti_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct dwc3_data *data = platform_get_drvdata(pdev); + struct dwc3_am62 *am62 = platform_get_drvdata(pdev); u32 reg; - device_for_each_child(dev, NULL, dwc3_ti_remove_core); + pm_runtime_get_sync(dev); + of_platform_depopulate(dev); /* Clear mode valid bit */ - reg = dwc3_ti_readl(data, USBSS_MODE_CONTROL); + reg = dwc3_ti_readl(am62, USBSS_MODE_CONTROL); reg &= ~USBSS_MODE_VALID; - dwc3_ti_writel(data, USBSS_MODE_CONTROL, reg); + dwc3_ti_writel(am62, USBSS_MODE_CONTROL, reg); pm_runtime_put_sync(dev); - clk_disable_unprepare(data->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); @@ -280,18 +272,18 @@ static int dwc3_ti_remove(struct platform_device *pdev) #ifdef CONFIG_PM static int dwc3_ti_suspend_common(struct device *dev) { - struct dwc3_data *data = dev_get_drvdata(dev); + struct dwc3_am62 *am62 = dev_get_drvdata(dev); - clk_disable_unprepare(data->usb2_refclk); + clk_disable_unprepare(am62->usb2_refclk); return 0; } static int dwc3_ti_resume_common(struct device *dev) { - struct dwc3_data *data = dev_get_drvdata(dev); + struct dwc3_am62 *am62 = dev_get_drvdata(dev); - clk_prepare_enable(data->usb2_refclk); + clk_prepare_enable(am62->usb2_refclk); return 0; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1c692c62e519..a4ff8470a46f 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2698,6 +2698,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) int ret; spin_lock_irqsave(&dwc->lock, flags); + if (!dwc->pullups_connected) { + spin_unlock_irqrestore(&dwc->lock, flags); + return 0; + } + dwc->connected = false; /* @@ -4112,6 +4117,13 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc) usb_gadget_set_state(dwc->gadget, USB_STATE_NOTATTACHED); dwc3_ep0_reset_state(dwc); + + /* + * Request PM idle to address condition where usage count is + * already decremented to zero, but waiting for the disconnect + * interrupt to set dwc->connected to FALSE. + */ + pm_request_idle(dwc->dev); } static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) @@ -4820,15 +4832,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) unsigned long flags; int ret; - if (!dwc->gadget_driver) - return 0; - ret = dwc3_gadget_soft_disconnect(dwc); if (ret) goto err; spin_lock_irqsave(&dwc->lock, flags); - dwc3_disconnect_gadget(dwc); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return 0; diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index d3999a7ae8aa..82f7f86c2057 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -94,6 +94,8 @@ int dwc3_host_init(struct dwc3 *dwc) memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); + props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-sg-trb-cache-size-quirk"); + if (dwc->usb3_lpm_capable) props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb3-lpm-capable"); diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 2a94e7661bd7..2560be48e11d 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -511,6 +511,19 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, return min(val, 900U) / 8; } +void check_remote_wakeup_config(struct usb_gadget *g, + struct usb_configuration *c) +{ + if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) { + /* Reset the rw bit if gadget is not capable of it */ + if (!g->wakeup_capable && g->ops->set_remote_wakeup) { + WARN(c->cdev, "Clearing wakeup bit for config c.%d\n", + c->bConfigurationValue); + c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP; + } + } +} + static int config_buf(struct usb_configuration *config, enum usb_device_speed speed, void *buf, u8 type) { @@ -959,6 +972,11 @@ static int set_config(struct usb_composite_dev *cdev, power = min(power, 500U); else power = min(power, 900U); + + if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) + usb_gadget_set_remote_wakeup(gadget, 1); + else + usb_gadget_set_remote_wakeup(gadget, 0); done: if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) usb_gadget_set_selfpowered(gadget); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index b51ab8096292..08cf5ca5df49 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1411,6 +1411,9 @@ static int configfs_composite_bind(struct usb_gadget *gadget, if (gadget_is_otg(gadget)) c->descriptors = otg_desc; + /* Properly configure the bmAttributes wakeup bit */ + check_remote_wakeup_config(gadget, c); + cfg = container_of(c, struct config_usb_cfg, c); if (!list_empty(&cfg->string_list)) { i = 0; diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 7b9a4cf9b100..d35f30a9cae2 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -544,21 +544,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep, static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_SENDING; - if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq); + if (rc) { bh->state = BUF_STATE_EMPTY; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_RECEIVING; - if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq); + if (rc) { bh->state = BUF_STATE_FULL; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index bbb6ff6b11aa..14601a2d2542 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1340,7 +1340,15 @@ parse_ntb: "Parsed NTB with %d frames\n", dgram_counter); to_process -= block_len; - if (to_process != 0) { + + /* + * Windows NCM driver avoids USB ZLPs by adding a 1-byte + * zero pad as needed. + */ + if (to_process == 1 && + (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { + to_process--; + } else if ((to_process > 0) && (block_len != 0)) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index f48c95c0feee..e7e370df4021 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -292,7 +292,9 @@ int usb_ep_queue(struct usb_ep *ep, { int ret = 0; - if (WARN_ON_ONCE(!ep->enabled && ep->address)) { + if (!ep->enabled && ep->address) { + pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n", + ep->address, ep->name); ret = -ESHUTDOWN; goto out; } @@ -525,6 +527,33 @@ out: } EXPORT_SYMBOL_GPL(usb_gadget_wakeup); +/** + * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature. + * @gadget:the device being configured for remote wakeup + * @set:value to be configured. + * + * set to one to enable remote wakeup feature and zero to disable it. + * + * returns zero on success, else negative errno. + */ +int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) +{ + int ret = 0; + + if (!gadget->ops->set_remote_wakeup) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = gadget->ops->set_remote_wakeup(gadget, set); + +out: + trace_usb_gadget_set_remote_wakeup(gadget, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup); + /** * usb_gadget_set_selfpowered - sets the device selfpowered feature. * @gadget:the device being declared as self-powered diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 538c1b9a2883..c42d5aa99e81 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -2650,7 +2650,7 @@ net2272_plat_probe(struct platform_device *pdev) goto err_req; } - ret = net2272_probe_fin(dev, IRQF_TRIGGER_LOW); + ret = net2272_probe_fin(dev, irqflags); if (ret) goto err_io; diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index a8cadc45c65a..fd7a9535973e 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3486,8 +3486,8 @@ static void tegra_xudc_device_params_init(struct tegra_xudc *xudc) static int tegra_xudc_phy_get(struct tegra_xudc *xudc) { - int err = 0, usb3; - unsigned int i; + int err = 0, usb3_companion_port; + unsigned int i, j; xudc->utmi_phy = devm_kcalloc(xudc->dev, xudc->soc->num_phys, sizeof(*xudc->utmi_phy), GFP_KERNEL); @@ -3515,7 +3515,7 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc) if (IS_ERR(xudc->utmi_phy[i])) { err = PTR_ERR(xudc->utmi_phy[i]); dev_err_probe(xudc->dev, err, - "failed to get usb2-%d PHY\n", i); + "failed to get PHY for phy-name usb2-%d\n", i); goto clean_up; } else if (xudc->utmi_phy[i]) { /* Get usb-phy, if utmi phy is available */ @@ -3534,19 +3534,30 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc) } /* Get USB3 phy */ - usb3 = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i); - if (usb3 < 0) + usb3_companion_port = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i); + if (usb3_companion_port < 0) continue; - snprintf(phy_name, sizeof(phy_name), "usb3-%d", usb3); - xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name); - if (IS_ERR(xudc->usb3_phy[i])) { - err = PTR_ERR(xudc->usb3_phy[i]); - dev_err_probe(xudc->dev, err, - "failed to get usb3-%d PHY\n", usb3); - goto clean_up; - } else if (xudc->usb3_phy[i]) - dev_dbg(xudc->dev, "usb3-%d PHY registered", usb3); + for (j = 0; j < xudc->soc->num_phys; j++) { + snprintf(phy_name, sizeof(phy_name), "usb3-%d", j); + xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name); + if (IS_ERR(xudc->usb3_phy[i])) { + err = PTR_ERR(xudc->usb3_phy[i]); + dev_err_probe(xudc->dev, err, + "failed to get PHY for phy-name usb3-%d\n", j); + goto clean_up; + } else if (xudc->usb3_phy[i]) { + int usb2_port = + tegra_xusb_padctl_get_port_number(xudc->utmi_phy[i]); + int usb3_port = + tegra_xusb_padctl_get_port_number(xudc->usb3_phy[i]); + if (usb3_port == usb3_companion_port) { + dev_dbg(xudc->dev, "USB2 port %d is paired with USB3 port %d for device mode port %d\n", + usb2_port, usb3_port, i); + break; + } + } + } } return err; diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h index abdbcb1bacb0..a5ed26fbc2da 100644 --- a/drivers/usb/gadget/udc/trace.h +++ b/drivers/usb/gadget/udc/trace.h @@ -91,6 +91,11 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup, TP_ARGS(g, ret) ); +DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup, + TP_PROTO(struct usb_gadget *g, int ret), + TP_ARGS(g, ret) +); + DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 807c846fb2d6..f166908aff5c 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -310,6 +310,9 @@ static int xhci_plat_probe(struct platform_device *pdev) "xhci-u2-broken-suspend")) xhci->quirks |= XHCI_U2_BROKEN_SUSPEND; + if (device_property_read_bool(tmpdev, "xhci-sg-trb-cache-size-quirk")) + xhci->quirks |= XHCI_SG_TRB_CACHE_SIZE_QUIRK; + device_property_read_u32(tmpdev, "imod-interval-ns", &xhci->imod_interval); } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 8e248f423a4f..84013853297e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2355,6 +2355,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2370,9 +2373,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: @@ -2380,8 +2387,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2401,6 +2409,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2409,6 +2420,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2793,17 +2812,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2815,9 +2868,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6477ee92a517..8f9a7f1ce67f 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1334,6 +1334,8 @@ static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb) temp = kzalloc_node(buf_len, GFP_ATOMIC, dev_to_node(hcd->self.sysdev)); + if (!temp) + return -ENOMEM; if (usb_urb_dir_out(urb)) sg_pcopy_to_buffer(urb->sg, urb->num_sgs, diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 682e076ce224..7e782548d397 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1570,6 +1570,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index 32e6d19f7011..a327f8bc5704 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -19,7 +19,9 @@ static struct class *role_class; struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ + struct module *module; /* the module this device depends on */ enum usb_role role; + bool registered; /* From descriptor */ struct device *usb2_port; @@ -46,6 +48,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) if (IS_ERR_OR_NULL(sw)) return 0; + if (!sw->registered) + return -EOPNOTSUPP; + mutex_lock(&sw->lock); ret = sw->set(sw, role); @@ -71,7 +76,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) { enum usb_role role; - if (IS_ERR_OR_NULL(sw)) + if (IS_ERR_OR_NULL(sw) || !sw->registered) return USB_ROLE_NONE; mutex_lock(&sw->lock); @@ -133,7 +138,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -155,7 +160,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -170,7 +175,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev.parent->driver->owner); + module_put(sw->module); put_device(&sw->dev); } } @@ -187,15 +192,18 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; + struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(role_class, fwnode); - if (dev) - WARN_ON(!try_module_get(dev->parent->driver->owner)); + if (dev) { + sw = to_role_switch(dev); + WARN_ON(!try_module_get(sw->module)); + } - return dev ? to_role_switch(dev) : NULL; + return sw; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -337,6 +345,7 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; + sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = role_class; @@ -351,6 +360,8 @@ usb_role_switch_register(struct device *parent, return ERR_PTR(ret); } + sw->registered = true; + /* TODO: Symlinks for the host port and the device controller. */ return sw; @@ -365,8 +376,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register); */ void usb_role_switch_unregister(struct usb_role_switch *sw) { - if (!IS_ERR_OR_NULL(sw)) + if (!IS_ERR_OR_NULL(sw)) { + sw->registered = false; device_unregister(&sw->dev); + } } EXPORT_SYMBOL_GPL(usb_role_switch_unregister); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f1d7a5a863aa..aa30288c8a8e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -56,6 +56,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ + { USB_DEVICE(0x04BF, 0x1301) }, /* TDK Corporation NC0110013M - Network Controller */ + { USB_DEVICE(0x04BF, 0x1303) }, /* TDK Corporation MM0110113M - i3 Micro Module */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ @@ -144,8 +146,10 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ + { USB_DEVICE(0x10C4, 0x863C) }, /* MGP Instruments PDS100 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */ { USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */ { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ @@ -176,6 +180,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ + { USB_DEVICE(0x11CA, 0x0212) }, /* Verifone USB to Printer (UART, CP2102) */ { USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */ { USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index fe2173e37b06..248cbc9c48fd 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1077,6 +1077,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_UNBUF_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + /* GMC devices */ + { USB_DEVICE(GMC_VID, GMC_Z216C_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 21a2b5a25fc0..5ee60ba2a73c 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1606,3 +1606,9 @@ #define UBLOX_VID 0x1546 #define UBLOX_C099F9P_ZED_PID 0x0502 #define UBLOX_C099F9P_ODIN_PID 0x0503 + +/* + * GMC devices + */ +#define GMC_VID 0x1cd7 +#define GMC_Z216C_PID 0x0217 /* GMC Z216C Adapter IR-USB */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4adef9259870..1a3e5a9414f0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -613,6 +613,11 @@ static void option_instat_callback(struct urb *urb); /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 +/* MeiG Smart Technology products */ +#define MEIGSMART_VENDOR_ID 0x2dee +/* MeiG Smart SLM320 based on UNISOC UIS8910 */ +#define MEIGSMART_PRODUCT_SLM320 0x4d41 + /* Device flags */ /* Highest interface number which can be used with NCTRL() and RSVD() */ @@ -2269,6 +2274,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ @@ -2281,6 +2287,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index b1e844bf31f8..703a9c563557 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -184,6 +184,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ {DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */ + {DEVICE_SWI(0x413c, 0x8217)}, /* Dell Wireless DW5826e */ + {DEVICE_SWI(0x413c, 0x8218)}, /* Dell Wireless DW5826e QDL */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c index 4e0eef1440b7..300aeef160e7 100644 --- a/drivers/usb/storage/isd200.c +++ b/drivers/usb/storage/isd200.c @@ -1105,7 +1105,7 @@ static void isd200_dump_driveid(struct us_data *us, u16 *id) static int isd200_get_inquiry_data( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; - int retStatus = ISD200_GOOD; + int retStatus; u16 *id = info->id; usb_stor_dbg(us, "Entering isd200_get_inquiry_data\n"); @@ -1137,6 +1137,13 @@ static int isd200_get_inquiry_data( struct us_data *us ) isd200_fix_driveid(id); isd200_dump_driveid(us, id); + /* Prevent division by 0 in isd200_scsi_to_ata() */ + if (id[ATA_ID_HEADS] == 0 || id[ATA_ID_SECTORS] == 0) { + usb_stor_dbg(us, " Invalid ATA Identify data\n"); + retStatus = ISD200_ERROR; + goto Done; + } + memset(&info->InquiryData, 0, sizeof(info->InquiryData)); /* Standard IDE interface only supports disks */ @@ -1202,6 +1209,7 @@ static int isd200_get_inquiry_data( struct us_data *us ) } } + Done: usb_stor_dbg(us, "Leaving isd200_get_inquiry_data %08X\n", retStatus); return(retStatus); @@ -1481,22 +1489,27 @@ static int isd200_init_info(struct us_data *us) static int isd200_Initialization(struct us_data *us) { + int rc = 0; + usb_stor_dbg(us, "ISD200 Initialization...\n"); /* Initialize ISD200 info struct */ - if (isd200_init_info(us) == ISD200_ERROR) { + if (isd200_init_info(us) < 0) { usb_stor_dbg(us, "ERROR Initializing ISD200 Info struct\n"); + rc = -ENOMEM; } else { /* Get device specific data */ - if (isd200_get_inquiry_data(us) != ISD200_GOOD) + if (isd200_get_inquiry_data(us) != ISD200_GOOD) { usb_stor_dbg(us, "ISD200 Initialization Failure\n"); - else + rc = -EINVAL; + } else { usb_stor_dbg(us, "ISD200 Initialization complete\n"); + } } - return 0; + return rc; } diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 2384026ec4d7..9004f1f685a8 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -183,6 +183,13 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index de3836412bf3..af619efe8eab 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp, * daft to me. */ -static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) +static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) { struct uas_dev_info *devinfo = cmnd->device->hostdata; struct urb *urb; @@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) urb = uas_alloc_sense_urb(devinfo, gfp, cmnd); if (!urb) - return NULL; + return -ENOMEM; usb_anchor_urb(urb, &devinfo->sense_urbs); err = usb_submit_urb(urb, gfp); if (err) { usb_unanchor_urb(urb); uas_log_cmd_state(cmnd, "sense submit err", err); usb_free_urb(urb); - return NULL; } - return urb; + return err; } static int uas_submit_urbs(struct scsi_cmnd *cmnd, struct uas_dev_info *devinfo) { struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); - struct urb *urb; int err; lockdep_assert_held(&devinfo->lock); if (cmdinfo->state & SUBMIT_STATUS_URB) { - urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC); - if (!urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + err = uas_submit_sense_urb(cmnd, GFP_ATOMIC); + if (err) + return err; cmdinfo->state &= ~SUBMIT_STATUS_URB; } @@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_FROM_DEVICE); if (!cmdinfo->data_in_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_IN_URB; } @@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_in_urb); uas_log_cmd_state(cmnd, "data in submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_IN_URB; cmdinfo->state |= DATA_IN_URB_INFLIGHT; @@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_TO_DEVICE); if (!cmdinfo->data_out_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_OUT_URB; } @@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_out_urb); uas_log_cmd_state(cmnd, "data out submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_OUT_URB; cmdinfo->state |= DATA_OUT_URB_INFLIGHT; @@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (cmdinfo->state & ALLOC_CMD_URB) { cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd); if (!cmdinfo->cmd_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_CMD_URB; } @@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->cmd_urb); uas_log_cmd_state(cmnd, "cmd submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->cmd_urb = NULL; cmdinfo->state &= ~SUBMIT_CMD_URB; @@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd) * of queueing, no matter how fatal the error */ if (err == -ENODEV) { - set_host_byte(cmnd, DID_ERROR); + set_host_byte(cmnd, DID_NO_CONNECT); scsi_done(cmnd); goto zombie; } @@ -878,6 +876,13 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 3da404d5178d..ce83f558fe44 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1245,6 +1245,7 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, { struct typec_port *port = to_typec_port(dev); struct usb_power_delivery *pd; + int ret; if (!port->ops || !port->ops->pd_set) return -EOPNOTSUPP; @@ -1253,7 +1254,11 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, if (!pd) return -EINVAL; - return port->ops->pd_set(port, pd); + ret = port->ops->pd_set(port, pd); + if (ret) + return ret; + + return size; } static ssize_t select_usb_power_delivery_show(struct device *dev, diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dc2dea3768fb..98f335cbbcde 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -138,8 +138,12 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) if (!(cci & UCSI_CCI_COMMAND_COMPLETE)) return -EIO; - if (cci & UCSI_CCI_NOT_SUPPORTED) + if (cci & UCSI_CCI_NOT_SUPPORTED) { + if (ucsi_acknowledge_command(ucsi) < 0) + dev_err(ucsi->dev, + "ACK of unsupported command failed\n"); return -EOPNOTSUPP; + } if (cci & UCSI_CCI_ERROR) { if (cmd == UCSI_GET_ERROR_STATUS) @@ -829,9 +833,11 @@ static void ucsi_handle_connector_change(struct work_struct *work) if (con->status.change & UCSI_CONSTAT_CAM_CHANGE) ucsi_partner_task(con, ucsi_check_altmodes, 1, 0); + mutex_lock(&ucsi->ppm_lock); clear_bit(EVENT_PENDING, &con->ucsi->flags); - ret = ucsi_acknowledge_connector_change(ucsi); + mutex_unlock(&ucsi->ppm_lock); + if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); @@ -872,13 +878,47 @@ static int ucsi_reset_connector(struct ucsi_connector *con, bool hard) static int ucsi_reset_ppm(struct ucsi *ucsi) { - u64 command = UCSI_PPM_RESET; + u64 command; unsigned long tmo; u32 cci; int ret; mutex_lock(&ucsi->ppm_lock); + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + if (ret < 0) + goto out; + + /* + * If UCSI_CCI_RESET_COMPLETE is already set we must clear + * the flag before we start another reset. Send a + * UCSI_SET_NOTIFICATION_ENABLE command to achieve this. + * Ignore a timeout and try the reset anyway if this fails. + */ + if (cci & UCSI_CCI_RESET_COMPLETE) { + command = UCSI_SET_NOTIFICATION_ENABLE; + ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, + sizeof(command)); + if (ret < 0) + goto out; + + tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); + do { + ret = ucsi->ops->read(ucsi, UCSI_CCI, + &cci, sizeof(cci)); + if (ret < 0) + goto out; + if (cci & UCSI_CCI_COMMAND_COMPLETE) + break; + if (time_is_before_jiffies(tmo)) + break; + msleep(20); + } while (1); + + WARN_ON(cci & UCSI_CCI_RESET_COMPLETE); + } + + command = UCSI_PPM_RESET; ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, sizeof(command)); if (ret < 0) diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 60ce9fb6e745..dbb10cb310d4 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -220,12 +220,12 @@ struct ucsi_cable_property { #define UCSI_CABLE_PROP_FLAG_VBUS_IN_CABLE BIT(0) #define UCSI_CABLE_PROP_FLAG_ACTIVE_CABLE BIT(1) #define UCSI_CABLE_PROP_FLAG_DIRECTIONALITY BIT(2) -#define UCSI_CABLE_PROP_FLAG_PLUG_TYPE(_f_) ((_f_) & GENMASK(3, 0)) +#define UCSI_CABLE_PROP_FLAG_PLUG_TYPE(_f_) (((_f_) & GENMASK(4, 3)) >> 3) #define UCSI_CABLE_PROPERTY_PLUG_TYPE_A 0 #define UCSI_CABLE_PROPERTY_PLUG_TYPE_B 1 #define UCSI_CABLE_PROPERTY_PLUG_TYPE_C 2 #define UCSI_CABLE_PROPERTY_PLUG_OTHER 3 -#define UCSI_CABLE_PROP_MODE_SUPPORT BIT(5) +#define UCSI_CABLE_PROP_FLAG_MODE_SUPPORT BIT(5) u8 latency; } __packed; diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 217355f1f9b9..b4d86d47c5db 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -23,6 +23,9 @@ struct ucsi_acpi { void *base; struct completion complete; unsigned long flags; +#define UCSI_ACPI_SUPPRESS_EVENT 0 +#define UCSI_ACPI_COMMAND_PENDING 1 +#define UCSI_ACPI_ACK_PENDING 2 guid_t guid; u64 cmd; }; @@ -73,9 +76,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI; int ret; - set_bit(COMMAND_PENDING, &ua->flags); + if (ack) + set_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); + else + set_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -85,7 +92,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, ret = -ETIMEDOUT; out_clear_bit: - clear_bit(COMMAND_PENDING, &ua->flags); + if (ack) + clear_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); + else + clear_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); return ret; } @@ -119,12 +129,62 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; -static const struct dmi_system_id zenbook_dmi_id[] = { +/* + * Some Dell laptops don't like ACK commands with the + * UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE + * bit set. To work around this send a dummy command and bundle the + * UCSI_ACK_CONNECTOR_CHANGE with the UCSI_ACK_COMMAND_COMPLETE + * for the dummy command. + */ +static int +ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + u64 cmd = *(u64 *)val; + u64 dummycmd = UCSI_GET_CAPABILITY; + int ret; + + if (cmd == (UCSI_ACK_CC_CI | UCSI_ACK_CONNECTOR_CHANGE)) { + cmd |= UCSI_ACK_COMMAND_COMPLETE; + + /* + * The UCSI core thinks it is sending a connector change ack + * and will accept new connector change events. We don't want + * this to happen for the dummy command as its response will + * still report the very event that the core is trying to clear. + */ + set_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &dummycmd, + sizeof(dummycmd)); + clear_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + + if (ret < 0) + return ret; + } + + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd)); +} + +static const struct ucsi_operations ucsi_dell_ops = { + .read = ucsi_acpi_read, + .sync_write = ucsi_dell_sync_write, + .async_write = ucsi_acpi_async_write +}; + +static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), }, + .driver_data = (void *)&ucsi_zenbook_ops, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + }, + .driver_data = (void *)&ucsi_dell_ops, }, { } }; @@ -139,11 +199,14 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (ret) return; - if (UCSI_CCI_CONNECTOR(cci)) + if (UCSI_CCI_CONNECTOR(cci) && + !test_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); - if (test_bit(COMMAND_PENDING, &ua->flags) && - cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) + if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) + complete(&ua->complete); + if (cci & UCSI_CCI_COMMAND_COMPLETE && + test_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } @@ -151,6 +214,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev) { struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); const struct ucsi_operations *ops = &ucsi_acpi_ops; + const struct dmi_system_id *id; struct ucsi_acpi *ua; struct resource *res; acpi_status status; @@ -180,8 +244,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev) init_completion(&ua->complete); ua->dev = &pdev->dev; - if (dmi_check_system(zenbook_dmi_id)) - ops = &ucsi_zenbook_ops; + id = dmi_first_match(ucsi_acpi_quirks); + if (id) + ops = id->driver_data; ua->ucsi = ucsi_create(&pdev->dev, ops); if (IS_ERR(ua->ucsi)) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2b7e796c4889..74d295312466 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -185,8 +185,6 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev); static bool mlx5_vdpa_debug; -#define MLX5_CVQ_MAX_ENT 16 - #define MLX5_LOG_VIO_FLAG(_feature) \ do { \ if (features & BIT_ULL(_feature)) \ @@ -1980,9 +1978,16 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) + if (!is_index_valid(mvdev, idx)) return; + if (is_ctrl_vq_idx(mvdev, idx)) { + struct mlx5_control_vq *cvq = &mvdev->cvq; + + cvq->vring.vring.num = num; + return; + } + mvq = &ndev->vqs[idx]; mvq->num_ent = num; } @@ -2512,7 +2517,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) u16 idx = cvq->vring.last_avail_idx; err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, + cvq->vring.vring.num, false, (struct vring_desc *)(uintptr_t)cvq->desc_addr, (struct vring_avail *)(uintptr_t)cvq->driver_addr, (struct vring_used *)(uintptr_t)cvq->device_addr); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 61bde476cf9c..e7fc25bfdd23 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -120,7 +120,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) for (i = 0; i < vdpasim->dev_attr.nas; i++) vhost_iotlb_reset(&vdpasim->iommu[i]); - vdpasim->running = true; + vdpasim->running = false; spin_unlock(&vdpasim->iommu_lock); vdpasim->features = 0; @@ -513,6 +513,7 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) spin_lock(&vdpasim->lock); vdpasim->status = status; + vdpasim->running = (status & VIRTIO_CONFIG_S_DRIVER_OK) != 0; spin_unlock(&vdpasim->lock); } diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index d74164abbf40..ab9d8e3481f7 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -366,7 +366,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) { struct vfio_container *container; - container = kzalloc(sizeof(*container), GFP_KERNEL); + container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT); if (!container) return -ENOMEM; diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index 7b428eac3d3e..b125b6edf634 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -142,13 +142,14 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, irq = &vdev->mc_irqs[index]; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_fsl_mc_irq_handler(hwirq, irq); + if (irq->trigger) + eventfd_signal(irq->trigger, 1); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { u8 trigger = *(u8 *)data; - if (trigger) - vfio_fsl_mc_irq_handler(hwirq, irq); + if (trigger && irq->trigger) + eventfd_signal(irq->trigger, 1); } return 0; diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 0f19d502f351..dfab5b742191 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -99,7 +99,7 @@ struct iova_bitmap { struct iova_bitmap_map mapped; /* userspace address of the bitmap */ - u64 __user *bitmap; + u8 __user *bitmap; /* u64 index that @mapped points to */ unsigned long mapped_base_index; @@ -161,7 +161,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) { struct iova_bitmap_map *mapped = &bitmap->mapped; unsigned long npages; - u64 __user *addr; + u8 __user *addr; long ret; /* @@ -174,18 +174,19 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) bitmap->mapped_base_index) * sizeof(*bitmap->bitmap), PAGE_SIZE); - /* - * We always cap at max number of 'struct page' a base page can fit. - * This is, for example, on x86 means 2M of bitmap data max. - */ - npages = min(npages, PAGE_SIZE / sizeof(struct page *)); - /* * Bitmap address to be pinned is calculated via pointer arithmetic * with bitmap u64 word index. */ addr = bitmap->bitmap + bitmap->mapped_base_index; + /* + * We always cap at max number of 'struct page' a base page can fit. + * This is, for example, on x86 means 2M of bitmap data max. + */ + npages = min(npages + !!offset_in_page(addr), + PAGE_SIZE / sizeof(struct page *)); + ret = pin_user_pages_fast((unsigned long)addr, npages, FOLL_WRITE, mapped->pages); if (ret <= 0) @@ -246,7 +247,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, mapped = &bitmap->mapped; mapped->pgshift = __ffs(page_size); - bitmap->bitmap = data; + bitmap->bitmap = (u8 __user *)data; bitmap->mapped_total_index = iova_bitmap_offset_to_index(bitmap, length - 1) + 1; bitmap->iova = iova; @@ -301,7 +302,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - bytes / sizeof(*bitmap->bitmap)); + DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); return remaining; } @@ -405,6 +406,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_page_idx = mapped->npages - 1; do { unsigned int page_idx = cur_bit / BITS_PER_PAGE; @@ -413,6 +415,9 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, last_bit - cur_bit + 1); void *kaddr; + if (unlikely(page_idx > last_page_idx)) + break; + kaddr = kmap_local_page(mapped->pages[page_idx]); bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 4a350421c5f6..523e0144c86f 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1244,7 +1244,7 @@ static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos) if (vdev->msi_perm) return len; - vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL); + vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL_ACCOUNT); if (!vdev->msi_perm) return -ENOMEM; @@ -1731,11 +1731,11 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) * no requirements on the length of a capability, so the gap between * capabilities needs byte granularity. */ - map = kmalloc(pdev->cfg_size, GFP_KERNEL); + map = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT); if (!map) return -ENOMEM; - vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL); + vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT); if (!vconfig) { kfree(map); return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index e030c2120183..f357fd157e1e 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -141,7 +141,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) * of the exclusive page in case that hot-add * device's bar is assigned into it. */ - dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL); + dummy_res = + kzalloc(sizeof(*dummy_res), GFP_KERNEL_ACCOUNT); if (dummy_res == NULL) goto no_mmap; @@ -856,7 +857,7 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, region = krealloc(vdev->region, (vdev->num_regions + 1) * sizeof(*region), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!region) return -ENOMEM; @@ -1637,7 +1638,7 @@ static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev, { struct vfio_pci_mmap_vma *mmap_vma; - mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL); + mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT); if (!mmap_vma) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 5e6ca5926954..dd70e2431bd7 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -180,7 +180,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) if (!addr || !(~addr)) return -ENODEV; - opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL); + opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL_ACCOUNT); if (!opregionvbt) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 40c3d7cf163f..03246a59b553 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -55,17 +55,24 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) { struct vfio_pci_core_device *vdev = opaque; - if (likely(is_intx(vdev) && !vdev->virq_disabled)) - eventfd_signal(vdev->ctx[0].trigger, 1); + if (likely(is_intx(vdev) && !vdev->virq_disabled)) { + struct eventfd_ctx *trigger; + + trigger = READ_ONCE(vdev->ctx[0].trigger); + if (likely(trigger)) + eventfd_signal(trigger, 1); + } } /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ -bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; unsigned long flags; bool masked_changed = false; + lockdep_assert_held(&vdev->igate); + spin_lock_irqsave(&vdev->irqlock, flags); /* @@ -95,6 +102,17 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) return masked_changed; } +bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +{ + bool mask_changed; + + mutex_lock(&vdev->igate); + mask_changed = __vfio_pci_intx_mask(vdev); + mutex_unlock(&vdev->igate); + + return mask_changed; +} + /* * If this is triggered by an eventfd, we can't call eventfd_signal * or else we'll deadlock on the eventfd wait queue. Return >0 when @@ -137,12 +155,21 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) return ret; } -void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) +static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) { + lockdep_assert_held(&vdev->igate); + if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0) vfio_send_intx_eventfd(vdev, NULL); } +void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) +{ + mutex_lock(&vdev->igate); + __vfio_pci_intx_unmask(vdev); + mutex_unlock(&vdev->igate); +} + static irqreturn_t vfio_intx_handler(int irq, void *dev_id) { struct vfio_pci_core_device *vdev = dev_id; @@ -169,95 +196,104 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) return ret; } -static int vfio_intx_enable(struct vfio_pci_core_device *vdev) +static int vfio_intx_enable(struct vfio_pci_core_device *vdev, + struct eventfd_ctx *trigger) { + struct pci_dev *pdev = vdev->pdev; + unsigned long irqflags; + char *name; + int ret; + if (!is_irq_none(vdev)) return -EINVAL; - if (!vdev->pdev->irq) + if (!pdev->irq) return -ENODEV; - vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); + if (!name) + return -ENOMEM; + + vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; vdev->num_ctx = 1; - /* - * If the virtual interrupt is masked, restore it. Devices - * supporting DisINTx can be masked at the hardware level - * here, non-PCI-2.3 devices will have to wait until the - * interrupt is enabled. - */ - vdev->ctx[0].masked = vdev->virq_disabled; - if (vdev->pci_2_3) - pci_intx(vdev->pdev, !vdev->ctx[0].masked); - - vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; - - return 0; -} - -static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) -{ - struct pci_dev *pdev = vdev->pdev; - unsigned long irqflags = IRQF_SHARED; - struct eventfd_ctx *trigger; - unsigned long flags; - int ret; - - if (vdev->ctx[0].trigger) { - free_irq(pdev->irq, vdev); - kfree(vdev->ctx[0].name); - eventfd_ctx_put(vdev->ctx[0].trigger); - vdev->ctx[0].trigger = NULL; - } - - if (fd < 0) /* Disable only */ - return 0; - - vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)", - pci_name(pdev)); - if (!vdev->ctx[0].name) - return -ENOMEM; - - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { - kfree(vdev->ctx[0].name); - return PTR_ERR(trigger); - } - + vdev->ctx[0].name = name; vdev->ctx[0].trigger = trigger; - if (!vdev->pci_2_3) - irqflags = 0; + /* + * Fill the initial masked state based on virq_disabled. After + * enable, changing the DisINTx bit in vconfig directly changes INTx + * masking. igate prevents races during setup, once running masked + * is protected via irqlock. + * + * Devices supporting DisINTx also reflect the current mask state in + * the physical DisINTx bit, which is not affected during IRQ setup. + * + * Devices without DisINTx support require an exclusive interrupt. + * IRQ masking is performed at the IRQ chip. Again, igate protects + * against races during setup and IRQ handlers and irqfds are not + * yet active, therefore masked is stable and can be used to + * conditionally auto-enable the IRQ. + * + * irq_type must be stable while the IRQ handler is registered, + * therefore it must be set before request_irq(). + */ + vdev->ctx[0].masked = vdev->virq_disabled; + if (vdev->pci_2_3) { + pci_intx(pdev, !vdev->ctx[0].masked); + irqflags = IRQF_SHARED; + } else { + irqflags = vdev->ctx[0].masked ? IRQF_NO_AUTOEN : 0; + } + + vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; ret = request_irq(pdev->irq, vfio_intx_handler, irqflags, vdev->ctx[0].name, vdev); if (ret) { - vdev->ctx[0].trigger = NULL; - kfree(vdev->ctx[0].name); - eventfd_ctx_put(trigger); + vdev->irq_type = VFIO_PCI_NUM_IRQS; + kfree(name); + vdev->num_ctx = 0; + kfree(vdev->ctx); return ret; } - /* - * INTx disable will stick across the new irq setup, - * disable_irq won't. - */ - spin_lock_irqsave(&vdev->irqlock, flags); - if (!vdev->pci_2_3 && vdev->ctx[0].masked) - disable_irq_nosync(pdev->irq); - spin_unlock_irqrestore(&vdev->irqlock, flags); + return 0; +} + +static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, + struct eventfd_ctx *trigger) +{ + struct pci_dev *pdev = vdev->pdev; + struct eventfd_ctx *old; + + old = vdev->ctx[0].trigger; + + WRITE_ONCE(vdev->ctx[0].trigger, trigger); + + /* Releasing an old ctx requires synchronizing in-flight users */ + if (old) { + synchronize_irq(pdev->irq); + vfio_virqfd_flush_thread(&vdev->ctx[0].unmask); + eventfd_ctx_put(old); + } return 0; } static void vfio_intx_disable(struct vfio_pci_core_device *vdev) { + struct pci_dev *pdev = vdev->pdev; + vfio_virqfd_disable(&vdev->ctx[0].unmask); vfio_virqfd_disable(&vdev->ctx[0].mask); - vfio_intx_set_signal(vdev, -1); + free_irq(pdev->irq, vdev); + if (vdev->ctx[0].trigger) + eventfd_ctx_put(vdev->ctx[0].trigger); + kfree(vdev->ctx[0].name); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -284,7 +320,8 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi if (!is_irq_none(vdev)) return -EINVAL; - vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), + GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; @@ -316,14 +353,14 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi } static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, - int vector, int fd, bool msix) + unsigned int vector, int fd, bool msix) { struct pci_dev *pdev = vdev->pdev; struct eventfd_ctx *trigger; int irq, ret; u16 cmd; - if (vector < 0 || vector >= vdev->num_ctx) + if (vector >= vdev->num_ctx) return -EINVAL; irq = pci_irq_vector(pdev, vector); @@ -343,7 +380,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, if (fd < 0) return 0; - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", + vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT, + "vfio-msi%s[%d](%s)", msix ? "x" : "", vector, pci_name(pdev)); if (!vdev->ctx[vector].name) @@ -397,7 +435,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, unsigned count, int32_t *fds, bool msix) { - int i, j, ret = 0; + unsigned int i, j; + int ret = 0; if (start >= vdev->num_ctx || start + count > vdev->num_ctx) return -EINVAL; @@ -408,8 +447,8 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, } if (ret) { - for (--j; j >= (int)start; j--) - vfio_msi_set_vector_signal(vdev, j, -1, msix); + for (i = start; i < j; i++) + vfio_msi_set_vector_signal(vdev, i, -1, msix); } return ret; @@ -418,16 +457,15 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) { struct pci_dev *pdev = vdev->pdev; - int i; + unsigned int i; u16 cmd; for (i = 0; i < vdev->num_ctx; i++) { vfio_virqfd_disable(&vdev->ctx[i].unmask); vfio_virqfd_disable(&vdev->ctx[i].mask); + vfio_msi_set_vector_signal(vdev, i, -1, msix); } - vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); - cmd = vfio_pci_memory_lock_and_enable(vdev); pci_free_irq_vectors(pdev); vfio_pci_memory_unlock_and_restore(vdev, cmd); @@ -455,11 +493,11 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t unmask = *(uint8_t *)data; if (unmask) - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) @@ -482,11 +520,11 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_pci_intx_mask(vdev); + __vfio_pci_intx_mask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t mask = *(uint8_t *)data; if (mask) - vfio_pci_intx_mask(vdev); + __vfio_pci_intx_mask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { return -ENOTTY; /* XXX implement me */ } @@ -507,19 +545,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + struct eventfd_ctx *trigger = NULL; int32_t fd = *(int32_t *)data; int ret; + if (fd >= 0) { + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) + return PTR_ERR(trigger); + } + if (is_intx(vdev)) - return vfio_intx_set_signal(vdev, fd); + ret = vfio_intx_set_signal(vdev, trigger); + else + ret = vfio_intx_enable(vdev, trigger); - ret = vfio_intx_enable(vdev); - if (ret) - return ret; - - ret = vfio_intx_set_signal(vdev, fd); - if (ret) - vfio_intx_disable(vdev); + if (ret && trigger) + eventfd_ctx_put(trigger); return ret; } @@ -541,7 +583,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - int i; + unsigned int i; bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false; if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) { diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e352a033b4ae..e27de61ac9fe 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -470,7 +470,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, goto out_unlock; } - ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL); + ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT); if (!ioeventfd) { ret = -ENOMEM; goto out_unlock; diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index c5b09ec0a3c9..7f4341a8d718 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, return 0; } +/* + * The trigger eventfd is guaranteed valid in the interrupt path + * and protected by the igate mutex when triggered via ioctl. + */ +static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx) +{ + if (likely(irq_ctx->trigger)) + eventfd_signal(irq_ctx->trigger, 1); +} + static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; @@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) spin_unlock_irqrestore(&irq_ctx->lock, flags); if (ret == IRQ_HANDLED) - eventfd_signal(irq_ctx->trigger, 1); + vfio_send_eventfd(irq_ctx); return ret; } @@ -164,22 +174,19 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; - eventfd_signal(irq_ctx->trigger, 1); + vfio_send_eventfd(irq_ctx); return IRQ_HANDLED; } static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, - int fd, irq_handler_t handler) + int fd) { struct vfio_platform_irq *irq = &vdev->irqs[index]; struct eventfd_ctx *trigger; - int ret; if (irq->trigger) { - irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); - free_irq(irq->hwirq, irq); - kfree(irq->name); + disable_irq(irq->hwirq); eventfd_ctx_put(irq->trigger); irq->trigger = NULL; } @@ -187,30 +194,20 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, if (fd < 0) /* Disable only */ return 0; - irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", - irq->hwirq, vdev->name); - if (!irq->name) - return -ENOMEM; - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { - kfree(irq->name); + if (IS_ERR(trigger)) return PTR_ERR(trigger); - } irq->trigger = trigger; - irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); - ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); - if (ret) { - kfree(irq->name); - eventfd_ctx_put(trigger); - irq->trigger = NULL; - return ret; - } - - if (!irq->masked) - enable_irq(irq->hwirq); + /* + * irq->masked effectively provides nested disables within the overall + * enable relative to trigger. Specifically request_irq() is called + * with NO_AUTOEN, therefore the IRQ is initially disabled. The user + * may only further disable the IRQ with a MASK operations because + * irq->masked is initially false. + */ + enable_irq(irq->hwirq); return 0; } @@ -229,7 +226,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, handler = vfio_irq_handler; if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) - return vfio_set_trigger(vdev, index, -1, handler); + return vfio_set_trigger(vdev, index, -1); if (start != 0 || count != 1) return -EINVAL; @@ -237,7 +234,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; - return vfio_set_trigger(vdev, index, fd, handler); + return vfio_set_trigger(vdev, index, fd); } if (flags & VFIO_IRQ_SET_DATA_NONE) { @@ -261,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, unsigned start, unsigned count, uint32_t flags, void *data) = NULL; + /* + * For compatibility, errors from request_irq() are local to the + * SET_IRQS path and reflected in the name pointer. This allows, + * for example, polling mode fallback for an exclusive IRQ failure. + */ + if (IS_ERR(vdev->irqs[index].name)) + return PTR_ERR(vdev->irqs[index].name); + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { case VFIO_IRQ_SET_ACTION_MASK: func = vfio_platform_set_irq_mask; @@ -281,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, int vfio_platform_irq_init(struct vfio_platform_device *vdev) { - int cnt = 0, i; + int cnt = 0, i, ret = 0; while (vdev->get_irq(vdev, cnt) >= 0) cnt++; @@ -292,37 +297,70 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) for (i = 0; i < cnt; i++) { int hwirq = vdev->get_irq(vdev, i); + irq_handler_t handler = vfio_irq_handler; - if (hwirq < 0) + if (hwirq < 0) { + ret = -EINVAL; goto err; + } spin_lock_init(&vdev->irqs[i].lock); vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; - if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) { vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED; + handler = vfio_automasked_irq_handler; + } vdev->irqs[i].count = 1; vdev->irqs[i].hwirq = hwirq; vdev->irqs[i].masked = false; + vdev->irqs[i].name = kasprintf(GFP_KERNEL, + "vfio-irq[%d](%s)", hwirq, + vdev->name); + if (!vdev->irqs[i].name) { + ret = -ENOMEM; + goto err; + } + + ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN, + vdev->irqs[i].name, &vdev->irqs[i]); + if (ret) { + kfree(vdev->irqs[i].name); + vdev->irqs[i].name = ERR_PTR(ret); + } } vdev->num_irqs = cnt; return 0; err: + for (--i; i >= 0; i--) { + if (!IS_ERR(vdev->irqs[i].name)) { + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); + kfree(vdev->irqs[i].name); + } + } kfree(vdev->irqs); - return -EINVAL; + return ret; } void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) { int i; - for (i = 0; i < vdev->num_irqs; i++) - vfio_set_trigger(vdev, i, -1, NULL); + for (i = 0; i < vdev->num_irqs; i++) { + vfio_virqfd_disable(&vdev->irqs[i].mask); + vfio_virqfd_disable(&vdev->irqs[i].unmask); + if (!IS_ERR(vdev->irqs[i].name)) { + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); + if (vdev->irqs[i].trigger) + eventfd_ctx_put(vdev->irqs[i].trigger); + kfree(vdev->irqs[i].name); + } + } vdev->num_irqs = 0; kfree(vdev->irqs); diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 414e98d82b02..e06b32ddedce 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -104,6 +104,13 @@ static void virqfd_inject(struct work_struct *work) virqfd->thread(virqfd->opaque, virqfd->data); } +static void virqfd_flush_inject(struct work_struct *work) +{ + struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject); + + flush_work(&virqfd->inject); +} + int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), @@ -115,7 +122,7 @@ int vfio_virqfd_enable(void *opaque, int ret = 0; __poll_t events; - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); + virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT); if (!virqfd) return -ENOMEM; @@ -127,6 +134,7 @@ int vfio_virqfd_enable(void *opaque, INIT_WORK(&virqfd->shutdown, virqfd_shutdown); INIT_WORK(&virqfd->inject, virqfd_inject); + INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject); irqfd = fdget(fd); if (!irqfd.file) { @@ -217,6 +225,19 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd) } EXPORT_SYMBOL_GPL(vfio_virqfd_disable); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd) +{ + unsigned long flags; + + spin_lock_irqsave(&virqfd_lock, flags); + if (*pvirqfd && (*pvirqfd)->thread) + queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject); + spin_unlock_irqrestore(&virqfd_lock, flags); + + flush_workqueue(vfio_irqfd_cleanup_wq); +} +EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread); + module_init(vfio_virqfd_init); module_exit(vfio_virqfd_exit); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2eea08029881..61c72e62abd4 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2588,12 +2588,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { - struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + /* Make sure all padding within the structure is initialized. */ + struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; - /* Make sure all padding within the structure is initialized. */ - memset(&node->msg, 0, sizeof node->msg); node->vq = vq; node->msg.type = type; return node; diff --git a/drivers/video/backlight/da9052_bl.c b/drivers/video/backlight/da9052_bl.c index 882359dd288c..aa00379392a0 100644 --- a/drivers/video/backlight/da9052_bl.c +++ b/drivers/video/backlight/da9052_bl.c @@ -117,6 +117,7 @@ static int da9052_backlight_probe(struct platform_device *pdev) wleds->led_reg = platform_get_device_id(pdev)->driver_data; wleds->state = DA9052_WLEDS_OFF; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.max_brightness = DA9052_MAX_BRIGHTNESS; diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c index 475f35635bf6..0d43f6326750 100644 --- a/drivers/video/backlight/lm3630a_bl.c +++ b/drivers/video/backlight/lm3630a_bl.c @@ -231,7 +231,7 @@ static int lm3630a_bank_a_get_brightness(struct backlight_device *bl) if (rval < 0) goto out_i2c_err; brightness |= rval; - goto out; + return brightness; } /* disable sleep */ @@ -242,11 +242,8 @@ static int lm3630a_bank_a_get_brightness(struct backlight_device *bl) rval = lm3630a_read(pchip, REG_BRT_A); if (rval < 0) goto out_i2c_err; - brightness = rval; + return rval; -out: - bl->props.brightness = brightness; - return bl->props.brightness; out_i2c_err: dev_err(pchip->dev, "i2c failed to access register\n"); return 0; @@ -306,7 +303,7 @@ static int lm3630a_bank_b_get_brightness(struct backlight_device *bl) if (rval < 0) goto out_i2c_err; brightness |= rval; - goto out; + return brightness; } /* disable sleep */ @@ -317,11 +314,8 @@ static int lm3630a_bank_b_get_brightness(struct backlight_device *bl) rval = lm3630a_read(pchip, REG_BRT_B); if (rval < 0) goto out_i2c_err; - brightness = rval; + return rval; -out: - bl->props.brightness = brightness; - return bl->props.brightness; out_i2c_err: dev_err(pchip->dev, "i2c failed to access register\n"); return 0; @@ -339,6 +333,7 @@ static int lm3630a_backlight_register(struct lm3630a_chip *pchip) struct backlight_properties props; const char *label; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; if (pdata->leda_ctrl != LM3630A_LEDA_DISABLE) { props.brightness = pdata->leda_init_brt; diff --git a/drivers/video/backlight/lm3639_bl.c b/drivers/video/backlight/lm3639_bl.c index 6580911671a3..4c9726a7fa72 100644 --- a/drivers/video/backlight/lm3639_bl.c +++ b/drivers/video/backlight/lm3639_bl.c @@ -339,6 +339,7 @@ static int lm3639_probe(struct i2c_client *client, } /* backlight */ + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.brightness = pdata->init_brt_led; props.max_brightness = pdata->max_brt_led; diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c index ba42f3fe0c73..d9b95dbd40d3 100644 --- a/drivers/video/backlight/lp8788_bl.c +++ b/drivers/video/backlight/lp8788_bl.c @@ -191,6 +191,7 @@ static int lp8788_backlight_register(struct lp8788_bl *bl) int init_brt; char *name; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_PLATFORM; props.max_brightness = MAX_BRIGHTNESS; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index fa205be94a4b..14498a0d13e0 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2397,11 +2397,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; int resize, ret, old_userfont, old_width, old_height, old_charcount; - char *old_data = NULL; + u8 *old_data = vc->vc_font.data; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); old_userfont = p->userfont; if ((p->userfont = userfont)) @@ -2435,13 +2433,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, update_screen(vc); } - if (old_data && (--REFCOUNT(old_data) == 0)) + if (old_userfont && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; err_out: p->fontdata = old_data; - vc->vc_font.data = (void *)old_data; + vc->vc_font.data = old_data; if (userfont) { p->userfont = old_userfont; diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index b7818b652698..a7b63c475f95 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo *var, DBG("savagefb_check_var"); + if (!var->pixclock) + return -EINVAL; + var->transp.offset = 0; var->transp.length = 0; switch (var->bits_per_pixel) { diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 1c197c3f9538..fe8996461b9e 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1475,6 +1475,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) vtotal = var->upper_margin + var->lower_margin + var->vsync_len; + if (!var->pixclock) + return -EINVAL; pixclock = var->pixclock; if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) { diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index bb1122909396..843f9f8e3917 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -255,6 +255,7 @@ static struct watchdog_device wdt_dev = { static int __init it87_wdt_init(void) { u8 chip_rev; + u8 ctrl; int rc; rc = superio_enter(); @@ -313,7 +314,18 @@ static int __init it87_wdt_init(void) superio_select(GPIO); superio_outb(WDT_TOV1, WDTCFG); - superio_outb(0x00, WDTCTRL); + + switch (chip_type) { + case IT8784_ID: + case IT8786_ID: + ctrl = superio_inb(WDTCTRL); + ctrl &= 0x08; + superio_outb(ctrl, WDTCTRL); + break; + default: + superio_outb(0x00, WDTCTRL); + } + superio_exit(); if (timeout < 1 || timeout > max_units * 60) { diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c index 570a71509d2a..78d51deab87a 100644 --- a/drivers/watchdog/stm32_iwdg.c +++ b/drivers/watchdog/stm32_iwdg.c @@ -21,6 +21,8 @@ #include #include +#define DEFAULT_TIMEOUT 10 + /* IWDG registers */ #define IWDG_KR 0x00 /* Key register */ #define IWDG_PR 0x04 /* Prescaler Register */ @@ -249,6 +251,7 @@ static int stm32_iwdg_probe(struct platform_device *pdev) wdd->parent = dev; wdd->info = &stm32_iwdg_info; wdd->ops = &stm32_iwdg_ops; + wdd->timeout = DEFAULT_TIMEOUT; wdd->min_timeout = DIV_ROUND_UP((RLR_MIN + 1) * PR_MIN, wdt->rate); wdd->max_hw_heartbeat_ms = ((RLR_MAX + 1) * wdt->data->max_prescaler * 1000) / wdt->rate; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 00f8e349921d..96b96516c980 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data) return; do_mask(info, EVT_MASK_REASON_EXPLICIT); - xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } static void enable_pirq(struct irq_data *data) @@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq) unsigned int cpu = cpu_from_irq(irq); struct xenbus_device *dev; - xen_evtchn_close(evtchn); - switch (type_from_irq(irq)) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; @@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq) } xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } xen_free_irq(irq); diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index 940e5e9e8a54..335451309566 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ struct gntdev_dmabuf { /* Number of pages this buffer has. */ int nr_pages; - /* Pages of this buffer. */ + /* Pages of this buffer (only for dma-buf export). */ struct page **pages; }; @@ -484,7 +485,7 @@ out: /* DMA buffer import support. */ static int -dmabuf_imp_grant_foreign_access(struct page **pages, u32 *refs, +dmabuf_imp_grant_foreign_access(unsigned long *gfns, u32 *refs, int count, int domid) { grant_ref_t priv_gref_head; @@ -507,7 +508,7 @@ dmabuf_imp_grant_foreign_access(struct page **pages, u32 *refs, } gnttab_grant_foreign_access_ref(cur_ref, domid, - xen_page_to_gfn(pages[i]), 0); + gfns[i], 0); refs[i] = cur_ref; } @@ -529,7 +530,6 @@ static void dmabuf_imp_end_foreign_access(u32 *refs, int count) static void dmabuf_imp_free_storage(struct gntdev_dmabuf *gntdev_dmabuf) { - kfree(gntdev_dmabuf->pages); kfree(gntdev_dmabuf->u.imp.refs); kfree(gntdev_dmabuf); } @@ -549,12 +549,6 @@ static struct gntdev_dmabuf *dmabuf_imp_alloc_storage(int count) if (!gntdev_dmabuf->u.imp.refs) goto fail; - gntdev_dmabuf->pages = kcalloc(count, - sizeof(gntdev_dmabuf->pages[0]), - GFP_KERNEL); - if (!gntdev_dmabuf->pages) - goto fail; - gntdev_dmabuf->nr_pages = count; for (i = 0; i < count; i++) @@ -576,7 +570,8 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev, struct dma_buf *dma_buf; struct dma_buf_attachment *attach; struct sg_table *sgt; - struct sg_page_iter sg_iter; + struct sg_dma_page_iter sg_iter; + unsigned long *gfns; int i; dma_buf = dma_buf_get(fd); @@ -624,26 +619,31 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev, gntdev_dmabuf->u.imp.sgt = sgt; - /* Now convert sgt to array of pages and check for page validity. */ - i = 0; - for_each_sgtable_page(sgt, &sg_iter, 0) { - struct page *page = sg_page_iter_page(&sg_iter); - /* - * Check if page is valid: this can happen if we are given - * a page from VRAM or other resources which are not backed - * by a struct page. - */ - if (!pfn_valid(page_to_pfn(page))) { - ret = ERR_PTR(-EINVAL); - goto fail_unmap; - } - - gntdev_dmabuf->pages[i++] = page; + gfns = kcalloc(count, sizeof(*gfns), GFP_KERNEL); + if (!gfns) { + ret = ERR_PTR(-ENOMEM); + goto fail_unmap; } - ret = ERR_PTR(dmabuf_imp_grant_foreign_access(gntdev_dmabuf->pages, + /* + * Now convert sgt to array of gfns without accessing underlying pages. + * It is not allowed to access the underlying struct page of an sg table + * exported by DMA-buf, but since we deal with special Xen dma device here + * (not a normal physical one) look at the dma addresses in the sg table + * and then calculate gfns directly from them. + */ + i = 0; + for_each_sgtable_dma_page(sgt, &sg_iter, 0) { + dma_addr_t addr = sg_page_iter_dma_address(&sg_iter); + unsigned long pfn = bfn_to_pfn(XEN_PFN_DOWN(dma_to_phys(dev, addr))); + + gfns[i++] = pfn_to_gfn(pfn); + } + + ret = ERR_PTR(dmabuf_imp_grant_foreign_access(gfns, gntdev_dmabuf->u.imp.refs, count, domid)); + kfree(gfns); if (IS_ERR(ret)) goto fail_end_access; diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index bc417da7e9c1..633fe4f527b8 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -46,6 +46,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb); void v9fs_free_inode(struct inode *inode); struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev); +void v9fs_set_netfs_context(struct inode *inode); int v9fs_init_inode(struct v9fs_session_info *v9ses, struct inode *inode, umode_t mode, dev_t rdev); void v9fs_evict_inode(struct inode *inode); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4d1a4a8d9277..5e2657c1dbbe 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -250,7 +250,7 @@ void v9fs_free_inode(struct inode *inode) /* * Set parameters for the netfs library */ -static void v9fs_set_netfs_context(struct inode *inode) +void v9fs_set_netfs_context(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); netfs_inode_init(&v9inode->netfs, &v9fs_req_ops); @@ -344,8 +344,6 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, err = -EINVAL; goto error; } - - v9fs_set_netfs_context(inode); error: return err; @@ -377,6 +375,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev) iput(inode); return ERR_PTR(err); } + v9fs_set_netfs_context(inode); return inode; } @@ -479,6 +478,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, goto error; v9fs_stat2inode(st, inode, sb, 0); + v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); return inode; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 5cfa4b4f070f..e15ad46833e0 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -130,6 +130,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, goto error; v9fs_stat2inode_dotl(st, inode, 0); + v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); if (retval) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index a484fa642808..90f9b2a46ff4 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -110,13 +110,14 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell, { struct afs_volume *volume = NULL; struct rb_node *p; - int seq = 0; + int seq = 1; do { /* Unfortunately, rbtree walking doesn't give reliable results * under just the RCU read lock, so we have to check for * changes. */ + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&cell->volume_lock, &seq); p = rcu_dereference_raw(cell->volumes.rb_node); diff --git a/fs/afs/server.c b/fs/afs/server.c index b5237206eac3..0bd2f5ba6900 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -27,7 +27,7 @@ struct afs_server *afs_find_server(struct afs_net *net, const struct afs_addr_list *alist; struct afs_server *server = NULL; unsigned int i; - int seq = 0, diff; + int seq = 1, diff; rcu_read_lock(); @@ -35,6 +35,7 @@ struct afs_server *afs_find_server(struct afs_net *net, if (server) afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq); server = NULL; + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&net->fs_addr_lock, &seq); if (srx->transport.family == AF_INET6) { @@ -90,7 +91,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu { struct afs_server *server = NULL; struct rb_node *p; - int diff, seq = 0; + int diff, seq = 1; _enter("%pU", uuid); @@ -102,7 +103,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu if (server) afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq); server = NULL; - + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&net->fs_lock, &seq); p = net->fs_servers.rb_node; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 1c9144e3e83a..a146d70efa65 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -341,7 +341,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { struct afs_server_list *new, *old, *discard; struct afs_vldb_entry *vldb; - char idbuf[16]; + char idbuf[24]; int ret, idsz; _enter(""); @@ -349,7 +349,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%llu", volume->vid); + idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { diff --git a/fs/aio.c b/fs/aio.c index e85ba0b77f59..3e3bf6fdc5ab 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -591,13 +591,24 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); - struct kioctx *ctx = req->ki_ctx; + struct aio_kiocb *req; + struct kioctx *ctx; unsigned long flags; + /* + * kiocb didn't come from aio or is neither a read nor a write, hence + * ignore it. + */ + if (!(iocb->ki_flags & IOCB_AIO_RW)) + return; + + req = container_of(iocb, struct aio_kiocb, rw); + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; + ctx = req->ki_ctx; + spin_lock_irqsave(&ctx->ctx_lock, flags); list_add_tail(&req->ki_list, &ctx->active_reqs); req->ki_cancel = cancel; @@ -1476,7 +1487,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = req->ki_filp->f_iocb_flags; + req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 08017b180a10..5993b627be58 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1318,6 +1318,7 @@ out: */ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) { + LIST_HEAD(retry_list); struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; @@ -1339,6 +1340,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { + u64 used; int trimming; block_group = list_first_entry(&fs_info->unused_bgs, @@ -1374,9 +1376,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) goto next; } + spin_lock(&space_info->lock); spin_lock(&block_group->lock); - if (block_group->reserved || block_group->pinned || - block_group->used || block_group->ro || + if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have @@ -1386,10 +1388,50 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) */ trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } + + /* + * The block group may be unused but there may be space reserved + * accounting with the existence of that block group, that is, + * space_info->bytes_may_use was incremented by a task but no + * space was yet allocated from the block group by the task. + * That space may or may not be allocated, as we are generally + * pessimistic about space reservation for metadata as well as + * for data when using compression (as we reserve space based on + * the worst case, when data can't be compressed, and before + * actually attempting compression, before starting writeback). + * + * So check if the total space of the space_info minus the size + * of this block group is less than the used space of the + * space_info - if that's the case, then it means we have tasks + * that might be relying on the block group in order to allocate + * extents, and add back the block group to the unused list when + * we finish, so that we retry later in case no tasks ended up + * needing to allocate extents from the block group. + */ + used = btrfs_space_info_used(space_info, true); + if (space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) { + /* + * Add a reference for the list, compensate for the ref + * drop under the "next" label for the + * fs_info->unused_bgs list. + */ + btrfs_get_block_group(block_group); + list_add_tail(&block_group->bg_list, &retry_list); + + trace_btrfs_skip_unused_block_group(block_group); + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); + up_write(&space_info->groups_sem); + goto next; + } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); /* We don't want to force the issue, only flip if it's ok. */ ret = inc_block_group_ro(block_group, 0); @@ -1513,12 +1555,16 @@ next: btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } + list_splice_tail(&retry_list, &fs_info->unused_bgs); spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); return; flip_async: btrfs_end_transaction(trans); + spin_lock(&fs_info->unused_bgs_lock); + list_splice_tail(&retry_list, &fs_info->unused_bgs); + spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_put_block_group(block_group); btrfs_discard_punt_unused_bgs_list(fs_info); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 47a2dcbfee25..bace40a00637 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -241,6 +241,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) return (block_group->start + block_group->length); } +static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg) +{ + lockdep_assert_held(&bg->lock); + + return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0); +} + static inline bool btrfs_is_block_group_data_only( struct btrfs_block_group *block_group) { diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 507b44d18572..4cbf38616620 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -512,7 +512,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); - if (unlikely(block_rsv->size == 0)) + if (unlikely(btrfs_block_rsv_size(block_rsv) == 0)) goto try_reserve; again: ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index 578c3497a455..df87c4949d06 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -101,4 +101,36 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv) return data_race(rsv->full); } +/* + * Get the reserved mount of a block reserve in a context where getting a stale + * value is acceptable, instead of accessing it directly and trigger data race + * warning from KCSAN. + */ +static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->reserved; + spin_unlock(&rsv->lock); + + return ret; +} + +/* + * Get the size of a block reserve in a context where getting a stale value is + * acceptable, instead of accessing it directly and trigger data race warning + * from KCSAN. + */ +static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->size; + spin_unlock(&rsv->lock); + + return ret; +} + #endif /* BTRFS_BLOCK_RSV_H */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 27d06bb5e5c0..cca1acf2e037 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1552,6 +1552,7 @@ struct btrfs_drop_extents_args { struct btrfs_file_private { void *filldir_buf; + u64 last_index; }; diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index f2bc5563c0f9..63b7fa706743 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -243,7 +243,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; u64 qgroup_rsv_size = 0; - u64 csum_leaves; unsigned outstanding_extents; lockdep_assert_held(&inode->lock); @@ -258,10 +257,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, outstanding_extents); reserve_size += btrfs_calc_metadata_size(fs_info, 1); } - csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, - inode->csum_bytes); - reserve_size += btrfs_calc_insert_metadata_size(fs_info, - csum_leaves); + if (!(inode->flags & BTRFS_INODE_NODATASUM)) { + u64 csum_leaves; + + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); + reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves); + } /* * For qgroup rsv, the calculation is very simple: * account one nodesize for each outstanding extent @@ -276,14 +277,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, spin_unlock(&block_rsv->lock); } -static void calc_inode_reservations(struct btrfs_fs_info *fs_info, +static void calc_inode_reservations(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, u64 *meta_reserve, u64 *qgroup_reserve) { + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 nr_extents = count_max_extents(fs_info, num_bytes); - u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + u64 csum_leaves; u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); + if (inode->flags & BTRFS_INODE_NODATASUM) + csum_leaves = 0; + else + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + *meta_reserve = btrfs_calc_insert_metadata_size(fs_info, nr_extents + csum_leaves); @@ -335,7 +342,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, * everything out and try again, which is bad. This way we just * over-reserve slightly, and clean up the mess when we are done. */ - calc_inode_reservations(fs_info, num_bytes, disk_num_bytes, + calc_inode_reservations(inode, num_bytes, disk_num_bytes, &meta_reserve, &qgroup_reserve); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true, noflush); @@ -356,7 +363,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, spin_lock(&inode->lock); nr_extents = count_max_extents(fs_info, num_bytes); btrfs_mod_outstanding_extents(inode, nr_extents); - inode->csum_bytes += disk_num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes += disk_num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); @@ -390,7 +398,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); - inode->csum_bytes -= num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes -= num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 1331e56e8e84..c6426080cf0a 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1665,6 +1665,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) } bool btrfs_readdir_get_delayed_items(struct inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list) { @@ -1684,14 +1685,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, ins_list); item = __btrfs_next_delayed_item(item); } item = __btrfs_first_delayed_deletion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, del_list); item = __btrfs_next_delayed_item(item); diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0163ca637a96..fc6fae9640f8 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info); /* Used for readdir() */ bool btrfs_readdir_get_delayed_items(struct inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list); void btrfs_readdir_put_delayed_items(struct inode *inode, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 61e58066b5fd..9c856a73d533 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -740,6 +740,23 @@ leave: return ret; } +static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) +{ + if (args->start.srcdevid == 0) { + if (memchr(args->start.srcdev_name, 0, + sizeof(args->start.srcdev_name)) == NULL) + return -ENAMETOOLONG; + } else { + args->start.srcdev_name[0] = 0; + } + + if (memchr(args->start.tgtdev_name, 0, + sizeof(args->start.tgtdev_name)) == NULL) + return -ENAMETOOLONG; + + return 0; +} + int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { @@ -752,10 +769,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, default: return -EINVAL; } - - if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || - args->start.tgtdev_name[0] == '\0') - return -EINVAL; + ret = btrfs_check_replace_dev_names(args); + if (ret < 0) + return ret; ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, args->start.srcdevid, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 40152458e7b7..5756edb37c61 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1643,12 +1643,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, - * pass 0 for new allocation. + * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev, + u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; @@ -1662,8 +1662,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { - /* Shouldn't get preallocated anon_dev for cached roots */ - ASSERT(!anon_dev); + /* + * Some other caller may have read out the newly inserted + * subvolume already (for things like backref walk etc). Not + * that common but still possible. In that case, we just need + * to free the anon_dev. + */ + if (unlikely(anon_dev && *anon_dev)) { + free_anon_bdev(*anon_dev); + *anon_dev = 0; + } + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); @@ -1683,7 +1692,7 @@ again: goto fail; } - ret = btrfs_init_fs_root(root, anon_dev); + ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; @@ -1719,7 +1728,7 @@ fail: * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ - if (anon_dev) + if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); @@ -1735,7 +1744,7 @@ fail: struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* @@ -1743,11 +1752,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, * the anonymous block device id * * @objectid: tree objectid - * @anon_dev: if zero, allocate a new anonymous block device or use the - * parameter value + * @anon_dev: if NULL, allocate a new anonymous block device or use the + * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev) + u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7322af63c0cc..24bddca86e9c 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -65,7 +65,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev); + u64 objectid, dev_t *anon_dev); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2a7c9088fe1f..0d2cc186974d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1209,7 +1209,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, u64 bytes_left, end; u64 aligned_start = ALIGN(start, 1 << 9); - if (WARN_ON(start != aligned_start)) { + /* Adjust the range to be aligned to 512B sectors if necessary. */ + if (start != aligned_start) { len -= aligned_start - start; len = round_down(len, 1 << 9); start = aligned_start; @@ -4206,6 +4207,42 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, return 0; } +static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl) +{ + if (ffe_ctl->for_treelog) { + spin_lock(&fs_info->treelog_bg_lock); + if (fs_info->treelog_bg) + ffe_ctl->hint_byte = fs_info->treelog_bg; + spin_unlock(&fs_info->treelog_bg_lock); + } else if (ffe_ctl->for_data_reloc) { + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg) + ffe_ctl->hint_byte = fs_info->data_reloc_bg; + spin_unlock(&fs_info->relocation_bg_lock); + } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) { + struct btrfs_block_group *block_group; + + spin_lock(&fs_info->zone_active_bgs_lock); + list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { + /* + * No lock is OK here because avail is monotinically + * decreasing, and this is just a hint. + */ + u64 avail = block_group->zone_capacity - block_group->alloc_offset; + + if (block_group_bits(block_group, ffe_ctl->flags) && + avail >= ffe_ctl->num_bytes) { + ffe_ctl->hint_byte = block_group->start; + break; + } + } + spin_unlock(&fs_info->zone_active_bgs_lock); + } + + return 0; +} + static int prepare_allocation(struct btrfs_fs_info *fs_info, struct find_free_extent_ctl *ffe_ctl, struct btrfs_space_info *space_info, @@ -4216,19 +4253,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, return prepare_allocation_clustered(fs_info, ffe_ctl, space_info, ins); case BTRFS_EXTENT_ALLOC_ZONED: - if (ffe_ctl->for_treelog) { - spin_lock(&fs_info->treelog_bg_lock); - if (fs_info->treelog_bg) - ffe_ctl->hint_byte = fs_info->treelog_bg; - spin_unlock(&fs_info->treelog_bg_lock); - } - if (ffe_ctl->for_data_reloc) { - spin_lock(&fs_info->relocation_bg_lock); - if (fs_info->data_reloc_bg) - ffe_ctl->hint_byte = fs_info->data_reloc_bg; - spin_unlock(&fs_info->relocation_bg_lock); - } - return 0; + return prepare_allocation_zoned(fs_info, ffe_ctl); default: BUG(); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9a7d77c410e2..f7f4bcc09464 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3364,8 +3364,23 @@ out: unwritten_start += logical_len; clear_extent_uptodate(io_tree, unwritten_start, end, NULL); - /* Drop extent maps for the part of the extent we didn't write. */ - btrfs_drop_extent_map_range(inode, unwritten_start, end, false); + /* + * Drop extent maps for the part of the extent we didn't write. + * + * We have an exception here for the free_space_inode, this is + * because when we do btrfs_get_extent() on the free space inode + * we will search the commit root. If this is a new block group + * we won't find anything, and we will trip over the assert in + * writepage where we do ASSERT(em->block_start != + * EXTENT_MAP_HOLE). + * + * Theoretically we could also skip this for any NOCOW extent as + * we don't mess with the extent map tree in the NOCOW case, but + * for now simply skip this if we are the free space inode. + */ + if (!btrfs_is_free_space_inode(inode)) + btrfs_drop_extent_map_range(inode, unwritten_start, + end, false); /* * If the ordered extent had an IOERR or something else went @@ -4689,6 +4704,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) u64 root_flags; int ret; + down_write(&fs_info->subvol_sem); + /* * Don't allow to delete a subvolume with send in progress. This is * inside the inode lock so the error handling that has to drop the bit @@ -4700,25 +4717,25 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) btrfs_warn(fs_info, "attempt to delete subvolume %llu during send", dest->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } if (atomic_read(&dest->nr_swapfiles)) { spin_unlock(&dest->root_item_lock); btrfs_warn(fs_info, "attempt to delete subvolume %llu with active swapfile", root->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags | BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - down_write(&fs_info->subvol_sem); - ret = may_destroy_subvol(dest); if (ret) - goto out_up_write; + goto out_undead; btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); /* @@ -4728,7 +4745,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); if (ret) - goto out_up_write; + goto out_undead; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { @@ -4794,15 +4811,17 @@ out_end_trans: inode->i_flags |= S_DEAD; out_release: btrfs_subvolume_release_metadata(root, &block_rsv); -out_up_write: - up_write(&fs_info->subvol_sem); +out_undead: if (ret) { spin_lock(&dest->root_item_lock); root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - } else { + } +out_up_write: + up_write(&fs_info->subvol_sem); + if (!ret) { d_invalidate(dentry); btrfs_prune_dentries(dest); ASSERT(dest->send_in_progress == 0); @@ -5948,6 +5967,78 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } +/* + * Find the highest existing sequence number in a directory and then set the + * in-memory index_cnt variable to the first free sequence number. + */ +static int btrfs_set_inode_index_count(struct btrfs_inode *inode) +{ + struct btrfs_root *root = inode->root; + struct btrfs_key key, found_key; + struct btrfs_path *path; + struct extent_buffer *leaf; + int ret; + + key.objectid = btrfs_ino(inode); + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + /* FIXME: we should be able to handle this */ + if (ret == 0) + goto out; + ret = 0; + + if (path->slots[0] == 0) { + inode->index_cnt = BTRFS_DIR_START_INDEX; + goto out; + } + + path->slots[0]--; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != btrfs_ino(inode) || + found_key.type != BTRFS_DIR_INDEX_KEY) { + inode->index_cnt = BTRFS_DIR_START_INDEX; + goto out; + } + + inode->index_cnt = found_key.offset + 1; +out: + btrfs_free_path(path); + return ret; +} + +static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) +{ + int ret = 0; + + btrfs_inode_lock(&dir->vfs_inode, 0); + if (dir->index_cnt == (u64)-1) { + ret = btrfs_inode_delayed_dir_index_count(dir); + if (ret) { + ret = btrfs_set_inode_index_count(dir); + if (ret) + goto out; + } + } + + /* index_cnt is the index number of next new entry, so decrement it. */ + *index = dir->index_cnt - 1; +out: + btrfs_inode_unlock(&dir->vfs_inode, 0); + + return ret; +} + /* * All this infrastructure exists because dir_emit can fault, and we are holding * the tree lock when doing readdir. For now just allocate a buffer and copy @@ -5960,10 +6051,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, static int btrfs_opendir(struct inode *inode, struct file *file) { struct btrfs_file_private *private; + u64 last_index; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index); + if (ret) + return ret; private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); if (!private) return -ENOMEM; + private->last_index = last_index; private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!private->filldir_buf) { kfree(private); @@ -5973,6 +6071,19 @@ static int btrfs_opendir(struct inode *inode, struct file *file) return 0; } +static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct btrfs_file_private *private = file->private_data; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)), + &private->last_index); + if (ret) + return ret; + + return generic_file_llseek(file, offset, whence); +} + struct dir_entry { u64 ino; u64 offset; @@ -6030,7 +6141,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&del_list); - put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); + put = btrfs_readdir_get_delayed_items(inode, private->last_index, + &ins_list, &del_list); again: key.type = BTRFS_DIR_INDEX_KEY; @@ -6047,6 +6159,8 @@ again: break; if (found_key.offset < ctx->pos) continue; + if (found_key.offset > private->last_index) + break; if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) continue; di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); @@ -6182,57 +6296,6 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now, return dirty ? btrfs_dirty_inode(inode) : 0; } -/* - * find the highest existing sequence number in a directory - * and then set the in-memory index_cnt variable to reflect - * free sequence numbers - */ -static int btrfs_set_inode_index_count(struct btrfs_inode *inode) -{ - struct btrfs_root *root = inode->root; - struct btrfs_key key, found_key; - struct btrfs_path *path; - struct extent_buffer *leaf; - int ret; - - key.objectid = btrfs_ino(inode); - key.type = BTRFS_DIR_INDEX_KEY; - key.offset = (u64)-1; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; - /* FIXME: we should be able to handle this */ - if (ret == 0) - goto out; - ret = 0; - - if (path->slots[0] == 0) { - inode->index_cnt = BTRFS_DIR_START_INDEX; - goto out; - } - - path->slots[0]--; - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != btrfs_ino(inode) || - found_key.type != BTRFS_DIR_INDEX_KEY) { - inode->index_cnt = BTRFS_DIR_START_INDEX; - goto out; - } - - inode->index_cnt = found_key.offset + 1; -out: - btrfs_free_path(path); - return ret; -} - /* * helper to find a free sequence number in a given directory. This current * code is very simple, later versions will do smarter things in the btree @@ -10726,6 +10789,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE) return -EINVAL; + /* + * Compressed extents should always have checksums, so error out if we + * have a NOCOW file or inode was created while mounted with NODATASUM. + */ + if (inode->flags & BTRFS_INODE_NODATASUM) + return -EINVAL; + orig_count = iov_iter_count(from); /* The extent size must be sane. */ @@ -11401,7 +11471,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { }; static const struct file_operations btrfs_dir_file_operations = { - .llseek = generic_file_llseek, + .llseek = btrfs_dir_llseek, .read = generic_read_dir, .iterate_shared = btrfs_real_readdir, .open = btrfs_opendir, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e8e4781c48a5..64b37afb7c87 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -708,7 +708,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, free_extent_buffer(leaf); leaf = NULL; - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); + new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); @@ -780,6 +780,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, return -EOPNOTSUPP; } + if (btrfs_root_refs(&root->root_item) == 0) + return -ENOENT; + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) return -EINVAL; @@ -3500,6 +3503,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EFAULT; goto out; } + if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) { + ret = -EOPNOTSUPP; + goto out; + } /* compression requires us to start the IO */ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { range.flags |= BTRFS_DEFRAG_RANGE_START_IO; @@ -4688,6 +4695,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } + if (sa->create && is_fstree(sa->qgroupid)) { + ret = -EINVAL; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 96ec9ccc2ef6..c14d4f70e84b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1635,6 +1635,15 @@ out: return ret; } +static bool qgroup_has_usage(struct btrfs_qgroup *qgroup) +{ + return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 || + qgroup->excl > 0 || qgroup->excl_cmpr > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0); +} + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1654,6 +1663,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) goto out; } + if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) { + ret = -EBUSY; + goto out; + } + /* Check if there are no children of this qgroup */ if (!list_empty(&qgroup->members)) { ret = -EBUSY; @@ -2786,11 +2800,6 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) goto cleanup; } - /* Free the reserved data space */ - btrfs_qgroup_free_refroot(fs_info, - record->data_rsv_refroot, - record->data_rsv, - BTRFS_QGROUP_RSV_DATA); /* * Use BTRFS_SEQ_LAST as time_seq to do special search, * which doesn't lock tree or delayed_refs and search @@ -2812,6 +2821,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) record->old_roots = NULL; new_roots = NULL; } + /* Free the reserved data space */ + btrfs_qgroup_free_refroot(fs_info, + record->data_rsv_refroot, + record->data_rsv, + BTRFS_QGROUP_RSV_DATA); cleanup: ulist_free(record->old_roots); ulist_free(new_roots); diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 4b052d4009d3..8083fe866d2b 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -883,8 +883,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, out_unlock: spin_unlock(&fs_info->ref_verify_lock); out: - if (ret) + if (ret) { + btrfs_free_ref_cache(fs_info); btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); + } return ret; } @@ -1015,8 +1017,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) } } if (ret) { - btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); btrfs_free_ref_cache(fs_info); + btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); } btrfs_free_path(path); return ret; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1672d4846baa..12a2b1e3f1e3 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4177,7 +4177,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, gen = fs_info->last_trans_committed; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); + ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr); + if (ret == -ENOENT) + break; + + if (ret) { + spin_lock(&sctx->stat_lock); + sctx->stat.super_errors++; + spin_unlock(&sctx->stat_lock); + continue; + } + if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->commit_total_bytes) break; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4a4d65b5e24f..9f7ffd9ef6fd 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6462,11 +6462,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret) goto out; } - if (sctx->cur_inode_last_extent < - sctx->cur_inode_size) { - ret = send_hole(sctx, sctx->cur_inode_size); - if (ret) + if (sctx->cur_inode_last_extent < sctx->cur_inode_size) { + ret = range_is_hole_in_parent(sctx, + sctx->cur_inode_last_extent, + sctx->cur_inode_size); + if (ret < 0) { goto out; + } else if (ret == 0) { + ret = send_hole(sctx, sctx->cur_inode_size); + if (ret < 0) + goto out; + } else { + /* Range is already a hole, skip. */ + ret = 0; + } } } if (need_truncate) { @@ -7852,7 +7861,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) } if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { - ret = -EINVAL; + ret = -EOPNOTSUPP; goto out; } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 2635fb4bffa0..8b75f436a9a3 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -847,7 +847,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info) { - u64 global_rsv_size = fs_info->global_block_rsv.reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv); u64 ordered, delalloc; u64 total = writable_total_bytes(fs_info, space_info); u64 thresh; @@ -948,8 +948,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1; delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes); if (ordered >= delalloc) - used += fs_info->delayed_refs_rsv.reserved + - fs_info->delayed_block_rsv.reserved; + used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) + + btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv); else used += space_info->bytes_may_use - global_rsv_size; @@ -1164,7 +1164,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) enum btrfs_flush_state flush; u64 delalloc_size = 0; u64 to_reclaim, block_rsv_size; - u64 global_rsv_size = global_rsv->reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv); loops++; @@ -1176,9 +1176,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) * assume it's tied up in delalloc reservations. */ block_rsv_size = global_rsv_size + - delayed_block_rsv->reserved + - delayed_refs_rsv->reserved + - trans_rsv->reserved; + btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv) + + btrfs_block_rsv_reserved(trans_rsv); if (block_rsv_size < space_info->bytes_may_use) delalloc_size = space_info->bytes_may_use - block_rsv_size; @@ -1198,16 +1198,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) to_reclaim = delalloc_size; flush = FLUSH_DELALLOC; } else if (space_info->bytes_pinned > - (delayed_block_rsv->reserved + - delayed_refs_rsv->reserved)) { + (btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv))) { to_reclaim = space_info->bytes_pinned; flush = COMMIT_TRANS; - } else if (delayed_block_rsv->reserved > - delayed_refs_rsv->reserved) { - to_reclaim = delayed_block_rsv->reserved; + } else if (btrfs_block_rsv_reserved(delayed_block_rsv) > + btrfs_block_rsv_reserved(delayed_refs_rsv)) { + to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv); flush = FLUSH_DELAYED_ITEMS_NR; } else { - to_reclaim = delayed_refs_rsv->reserved; + to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv); flush = FLUSH_DELAYED_REFS_NR; } diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 74fef1f49c35..fc468d1079c2 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1704,6 +1704,10 @@ static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj, unsigned long long limit; limit = memparse(buf, &endptr); + /* There could be trailing '\n', also catch any typos after the value. */ + endptr = skip_spaces(endptr); + if (*endptr != 0) + return -EINVAL; WRITE_ONCE(device->scrub_speed_max, limit); return len; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 60db4c3b82fa..b172091f4261 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1809,7 +1809,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); pending->snap = NULL; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 2b39c7f9226f..02e8398246ae 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1399,7 +1399,7 @@ static int check_extent_item(struct extent_buffer *leaf, if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { extent_err(leaf, slot, "inline ref item overflows extent item, ptr %lu iref size %u end %lu", - ptr, inline_type, end); + ptr, btrfs_extent_inline_ref_size(inline_type), end); return -EUCLEAN; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6fc2d99270c1..03cfb425ea4e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1444,7 +1444,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, if (in_range(physical_start, *start, len) || in_range(*start, physical_start, - physical_end - physical_start)) { + physical_end + 1 - physical_start)) { *start = physical_end + 1; return true; } diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index 7077f72e6f47..f449f7340aad 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -168,6 +168,8 @@ error_unsupported: dput(root); error_open_root: cachefiles_end_secure(cache, saved_cred); + put_cred(cache->cache_cred); + cache->cache_cred = NULL; error_getsec: fscache_relinquish_cache(cache_cookie); cache->cache = NULL; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index aa4efcabb5e3..5f4df9588620 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -805,6 +805,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) cachefiles_put_directory(cache->graveyard); cachefiles_put_directory(cache->store); mntput(cache->mnt); + put_cred(cache->cache_cred); kfree(cache->rootdirname); kfree(cache->secctx); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 36052a362683..57603782e7e2 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1391,7 +1391,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, if (flushing & CEPH_CAP_XATTR_EXCL) { arg->old_xattr_buf = __ceph_build_xattrs_blob(ci); arg->xattr_version = ci->i_xattrs.version; - arg->xattr_buf = ci->i_xattrs.blob; + arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob); } else { arg->xattr_buf = NULL; arg->old_xattr_buf = NULL; @@ -1457,6 +1457,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) encode_cap_msg(msg, arg); ceph_con_send(&arg->session->s_con, msg); ceph_buffer_put(arg->old_xattr_buf); + ceph_buffer_put(arg->xattr_buf); if (arg->wake) wake_up_all(&ci->i_cap_wq); } @@ -4597,12 +4598,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, struct inode *dir, int mds, int drop, int unless) { - struct dentry *parent = NULL; struct ceph_mds_request_release *rel = *p; struct ceph_dentry_info *di = ceph_dentry(dentry); int force = 0; int ret; + /* This shouldn't happen */ + BUG_ON(!dir); + /* * force an record for the directory caps if we have a dentry lease. * this is racy (can't take i_ceph_lock and d_lock together), but it @@ -4612,14 +4615,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, spin_lock(&dentry->d_lock); if (di->lease_session && di->lease_session->s_mds == mds) force = 1; - if (!dir) { - parent = dget(dentry->d_parent); - dir = d_inode(parent); - } spin_unlock(&dentry->d_lock); ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force); - dput(parent); spin_lock(&dentry->d_lock); if (ret && di->lease_session && di->lease_session->s_mds == mds) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 82874be94524..da9fcf48ab6c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3650,11 +3650,11 @@ static void handle_session(struct ceph_mds_session *session, if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) pr_info("mds%d reconnect success\n", session->s_mds); + session->s_features = features; if (session->s_state == CEPH_MDS_SESSION_OPEN) { pr_notice("mds%d is already opened\n", session->s_mds); } else { session->s_state = CEPH_MDS_SESSION_OPEN; - session->s_features = features; renewed_caps(mdsc, session, 0); if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features)) diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 3fbabc98e1f7..4a089d70ebd0 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -379,10 +379,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ceph_decode_skip_8(p, end, bad_ext); /* required_client_features */ ceph_decode_skip_set(p, end, 64, bad_ext); + /* bal_rank_mask */ + ceph_decode_skip_string(p, end, bad_ext); + } + if (mdsmap_ev >= 18) { ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); - } else { - /* This forces the usage of the (sync) SETXATTR Op */ - m->m_max_xattr_size = 0; } bad_ext: dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index f7fcf7f08ec6..ca4932e6f71b 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -194,10 +194,10 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) } /* - * This function walks through the snaprealm for an inode and returns the - * ceph_snap_realm for the first snaprealm that has quotas set (max_files, + * This function walks through the snaprealm for an inode and set the + * realmp with the first snaprealm that has quotas set (max_files, * max_bytes, or any, depending on the 'which_quota' argument). If the root is - * reached, return the root ceph_snap_realm instead. + * reached, set the realmp with the root ceph_snap_realm instead. * * Note that the caller is responsible for calling ceph_put_snap_realm() on the * returned realm. @@ -208,18 +208,19 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) * this function will return -EAGAIN; otherwise, the snaprealms walk-through * will be restarted. */ -static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, - struct inode *inode, - enum quota_get_realm which_quota, - bool retry) +static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode, + enum quota_get_realm which_quota, + struct ceph_snap_realm **realmp, bool retry) { struct ceph_inode_info *ci = NULL; struct ceph_snap_realm *realm, *next; struct inode *in; bool has_quota; + if (realmp) + *realmp = NULL; if (ceph_snap(inode) != CEPH_NOSNAP) - return NULL; + return 0; restart: realm = ceph_inode(inode)->i_snap_realm; @@ -245,7 +246,7 @@ restart: break; ceph_put_snap_realm(mdsc, realm); if (!retry) - return ERR_PTR(-EAGAIN); + return -EAGAIN; goto restart; } @@ -254,8 +255,11 @@ restart: iput(in); next = realm->parent; - if (has_quota || !next) - return realm; + if (has_quota || !next) { + if (realmp) + *realmp = realm; + return 0; + } ceph_get_snap_realm(mdsc, next); ceph_put_snap_realm(mdsc, realm); @@ -264,7 +268,7 @@ restart: if (realm) ceph_put_snap_realm(mdsc, realm); - return NULL; + return 0; } bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) @@ -272,6 +276,7 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); struct ceph_snap_realm *old_realm, *new_realm; bool is_same; + int ret; restart: /* @@ -281,9 +286,9 @@ restart: * dropped and we can then restart the whole operation. */ down_read(&mdsc->snap_rwsem); - old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true); - new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false); - if (PTR_ERR(new_realm) == -EAGAIN) { + get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true); + ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false); + if (ret == -EAGAIN) { up_read(&mdsc->snap_rwsem); if (old_realm) ceph_put_snap_realm(mdsc, old_realm); @@ -485,8 +490,8 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) bool is_updated = false; down_read(&mdsc->snap_rwsem); - realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), - QUOTA_GET_MAX_BYTES, true); + get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES, + &realm, true); up_read(&mdsc->snap_rwsem); if (!realm) return false; diff --git a/fs/dcache.c b/fs/dcache.c index 52e6d5fdab6b..b09bc88dbbec 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -787,12 +787,12 @@ static inline bool fast_dput(struct dentry *dentry) */ if (unlikely(ret < 0)) { spin_lock(&dentry->d_lock); - if (dentry->d_lockref.count > 1) { - dentry->d_lockref.count--; + if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) { spin_unlock(&dentry->d_lock); return true; } - return false; + dentry->d_lockref.count--; + goto locked; } /* @@ -850,6 +850,7 @@ static inline bool fast_dput(struct dentry *dentry) * else could have killed it and marked it dead. Either way, we * don't need to do anything else. */ +locked: if (dentry->d_lockref.count) { spin_unlock(&dentry->d_lock); return true; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 6ed09edabea0..2c797eb519da 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -174,7 +174,7 @@ static LIST_HEAD(dlm_node_addrs); static DEFINE_SPINLOCK(dlm_node_addrs_spin); static struct listen_connection listen_con; -static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; +static struct sockaddr_storage dlm_local_addr[DLM_MAX_ADDR_COUNT]; static int dlm_local_count; int dlm_allow_conn; @@ -398,7 +398,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, if (!sa_out) return 0; - if (dlm_local_addr[0]->ss_family == AF_INET) { + if (dlm_local_addr[0].ss_family == AF_INET) { struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; ret4->sin_addr.s_addr = in4->sin_addr.s_addr; @@ -727,7 +727,7 @@ static void add_sock(struct socket *sock, struct connection *con) static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, int *addr_len) { - saddr->ss_family = dlm_local_addr[0]->ss_family; + saddr->ss_family = dlm_local_addr[0].ss_family; if (saddr->ss_family == AF_INET) { struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; in4_addr->sin_port = cpu_to_be16(port); @@ -1167,7 +1167,7 @@ static int sctp_bind_addrs(struct socket *sock, uint16_t port) int i, addr_len, result = 0; for (i = 0; i < dlm_local_count; i++) { - memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); + memcpy(&localaddr, &dlm_local_addr[i], sizeof(localaddr)); make_sockaddr(&localaddr, port, &addr_len); if (!i) @@ -1187,7 +1187,7 @@ static int sctp_bind_addrs(struct socket *sock, uint16_t port) /* Get local addresses */ static void init_local(void) { - struct sockaddr_storage sas, *addr; + struct sockaddr_storage sas; int i; dlm_local_count = 0; @@ -1195,21 +1195,10 @@ static void init_local(void) if (dlm_our_addr(&sas, i)) break; - addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS); - if (!addr) - break; - dlm_local_addr[dlm_local_count++] = addr; + memcpy(&dlm_local_addr[dlm_local_count++], &sas, sizeof(sas)); } } -static void deinit_local(void) -{ - int i; - - for (i = 0; i < dlm_local_count; i++) - kfree(dlm_local_addr[i]); -} - static struct writequeue_entry *new_writequeue_entry(struct connection *con) { struct writequeue_entry *entry; @@ -1575,7 +1564,7 @@ static void dlm_connect(struct connection *con) } /* Create a socket to communicate with */ - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + result = sock_create_kern(&init_net, dlm_local_addr[0].ss_family, SOCK_STREAM, dlm_proto_ops->proto, &sock); if (result < 0) goto socket_err; @@ -1786,7 +1775,6 @@ void dlm_lowcomms_stop(void) foreach_conn(free_conn); srcu_read_unlock(&connections_srcu, idx); work_stop(); - deinit_local(); dlm_proto_ops = NULL; } @@ -1803,7 +1791,7 @@ static int dlm_listen_for_all(void) if (result < 0) return result; - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + result = sock_create_kern(&init_net, dlm_local_addr[0].ss_family, SOCK_STREAM, dlm_proto_ops->proto, &sock); if (result < 0) { log_print("Can't create comms socket: %d", result); @@ -1842,11 +1830,11 @@ static int dlm_tcp_bind(struct socket *sock) /* Bind to our cluster-known address connecting to avoid * routing problems. */ - memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); + memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr)); make_sockaddr(&src_addr, 0, &addr_len); - result = sock->ops->bind(sock, (struct sockaddr *)&src_addr, - addr_len); + result = kernel_bind(sock, (struct sockaddr *)&src_addr, + addr_len); if (result < 0) { /* This *may* not indicate a critical error */ log_print("could not bind for connect: %d", result); @@ -1860,7 +1848,7 @@ static int dlm_tcp_connect(struct connection *con, struct socket *sock, { int ret; - ret = sock->ops->connect(sock, addr, addr_len, O_NONBLOCK); + ret = kernel_connect(sock, addr, addr_len, O_NONBLOCK); switch (ret) { case -EINPROGRESS: fallthrough; @@ -1899,9 +1887,9 @@ static int dlm_tcp_listen_bind(struct socket *sock) int addr_len; /* Bind to our port */ - make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); - return sock->ops->bind(sock, (struct sockaddr *)dlm_local_addr[0], - addr_len); + make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); + return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0], + addr_len); } static const struct dlm_proto_ops dlm_tcp_ops = { @@ -1928,12 +1916,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock, int ret; /* - * Make sock->ops->connect() function return in specified time, + * Make kernel_connect() function return in specified time, * since O_NONBLOCK argument in connect() function does not work here, * then, we should restore the default value of this attribute. */ sock_set_sndtimeo(sock->sk, 5); - ret = sock->ops->connect(sock, addr, addr_len, 0); + ret = kernel_connect(sock, addr, addr_len, 0); sock_set_sndtimeo(sock->sk, 0); if (ret < 0) return ret; @@ -1992,7 +1980,7 @@ int dlm_lowcomms_start(void) error = work_start(); if (error) - goto fail_local; + goto fail; dlm_allow_conn = 1; @@ -2022,8 +2010,6 @@ fail_listen: fail_proto_ops: dlm_allow_conn = 0; work_stop(); -fail_local: - deinit_local(); fail: return error; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index c214fe0981bd..55340ac61456 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -76,6 +76,14 @@ static struct inode *__ecryptfs_get_inode(struct inode *lower_inode, if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) return ERR_PTR(-EXDEV); + + /* Reject dealing with casefold directories. */ + if (IS_CASEFOLDED(lower_inode)) { + pr_err_ratelimited("%s: Can't handle casefolded directory.\n", + __func__); + return ERR_PTR(-EREMOTE); + } + if (!igrab(lower_inode)) return ERR_PTR(-ESTALE); inode = iget5_locked(sb, (unsigned long)lower_inode, diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 9e4f47808bd5..13bc60698955 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -372,7 +372,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head) { - unsigned long variable_name_size = 1024; + unsigned long variable_name_size = 512; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; @@ -389,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), goto free; /* - * Per EFI spec, the maximum storage allocated for both - * the variable name and variable data is 1024 bytes. + * A small set of old UEFI implementations reject sizes + * above a certain threshold, the lowest seen in the wild + * is 512. */ do { - variable_name_size = 1024; + variable_name_size = 512; status = efivar_get_next_variable(&variable_name_size, variable_name, @@ -431,9 +432,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), break; case EFI_NOT_FOUND: break; + case EFI_BUFFER_TOO_SMALL: + pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n", + variable_name_size); + status = EFI_NOT_FOUND; + break; default: - printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n", - status); + pr_warn("efivars: get_next_variable: status=%lx\n", status); status = EFI_NOT_FOUND; break; } diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 26fa170090b8..c4a3187bdb8f 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -21,6 +21,8 @@ struct z_erofs_decompress_req { }; struct z_erofs_decompressor { + int (*config)(struct super_block *sb, struct erofs_super_block *dsb, + void *data, int size); int (*decompress)(struct z_erofs_decompress_req *rq, struct page **pagepool); char *name; @@ -93,6 +95,8 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); /* prototypes for specific algorithms */ +int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, void *data, int size); int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); #endif diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 064dc2dcfc72..59af148d1a98 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -443,4 +443,5 @@ const struct file_operations erofs_file_fops = { .read_iter = erofs_file_read_iter, .mmap = erofs_file_mmap, .splice_read = generic_file_splice_read, + .get_unmapped_area = thp_get_unmapped_area, }; diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 51b7ac7166d9..1eefa4411e06 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -24,11 +24,11 @@ struct z_erofs_lz4_decompress_ctx { unsigned int oend; }; -int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int size) +static int z_erofs_load_lz4_config(struct super_block *sb, + struct erofs_super_block *dsb, void *data, int size) { struct erofs_sb_info *sbi = EROFS_SB(sb); + struct z_erofs_lz4_cfgs *lz4 = data; u16 distance; if (lz4) { @@ -122,11 +122,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, } static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, - void *inpage, unsigned int *inputmargin, int *maptype, - bool may_inplace) + void *inpage, void *out, unsigned int *inputmargin, + int *maptype, bool may_inplace) { struct z_erofs_decompress_req *rq = ctx->rq; - unsigned int omargin, total, i, j; + unsigned int omargin, total, i; struct page **in; void *src, *tmp; @@ -136,20 +136,20 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) goto docopy; - for (i = 0; i < ctx->inpages; ++i) { - DBG_BUGON(rq->in[i] == NULL); - for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j) - if (rq->out[j] == rq->in[i]) - goto docopy; - } + for (i = 0; i < ctx->inpages; ++i) + if (rq->out[ctx->outpages - ctx->inpages + i] != + rq->in[i]) + goto docopy; + kunmap_local(inpage); + *maptype = 3; + return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT); } if (ctx->inpages <= 1) { *maptype = 0; return inpage; } - kunmap_atomic(inpage); - might_sleep(); + kunmap_local(inpage); src = erofs_vm_map_ram(rq->in, ctx->inpages); if (!src) return ERR_PTR(-ENOMEM); @@ -162,7 +162,7 @@ docopy: src = erofs_get_pcpubuf(ctx->inpages); if (!src) { DBG_BUGON(1); - kunmap_atomic(inpage); + kunmap_local(inpage); return ERR_PTR(-EFAULT); } @@ -173,9 +173,9 @@ docopy: min_t(unsigned int, total, PAGE_SIZE - *inputmargin); if (!inpage) - inpage = kmap_atomic(*in); + inpage = kmap_local_page(*in); memcpy(tmp, inpage + *inputmargin, page_copycnt); - kunmap_atomic(inpage); + kunmap_local(inpage); inpage = NULL; tmp += page_copycnt; total -= page_copycnt; @@ -205,16 +205,16 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, } static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, - u8 *out) + u8 *dst) { struct z_erofs_decompress_req *rq = ctx->rq; bool support_0padding = false, may_inplace = false; unsigned int inputmargin; - u8 *headpage, *src; + u8 *out, *headpage, *src; int ret, maptype; DBG_BUGON(*rq->in == NULL); - headpage = kmap_atomic(*rq->in); + headpage = kmap_local_page(*rq->in); /* LZ4 decompression inplace is only safe if zero_padding is enabled */ if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) { @@ -223,7 +223,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, min_t(unsigned int, rq->inputsize, EROFS_BLKSIZ - rq->pageofs_in)); if (ret) { - kunmap_atomic(headpage); + kunmap_local(headpage); return ret; } may_inplace = !((rq->pageofs_in + rq->inputsize) & @@ -231,11 +231,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } inputmargin = rq->pageofs_in; - src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin, + src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin, &maptype, may_inplace); if (IS_ERR(src)) return PTR_ERR(src); + out = dst + rq->pageofs_out; /* legacy format could compress extra data in a pcluster. */ if (rq->partial_decoding || !support_0padding) ret = LZ4_decompress_safe_partial(src + inputmargin, out, @@ -261,12 +262,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } if (maptype == 0) { - kunmap_atomic(headpage); + kunmap_local(headpage); } else if (maptype == 1) { vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { erofs_put_pcpubuf(src); - } else { + } else if (maptype != 3) { DBG_BUGON(1); return -EFAULT; } @@ -289,7 +290,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, /* one optimized fast path only for non bigpcluster cases yet */ if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) { DBG_BUGON(!*rq->out); - dst = kmap_atomic(*rq->out); + dst = kmap_local_page(*rq->out); dst_maptype = 0; goto dstmap_out; } @@ -309,9 +310,9 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, } dstmap_out: - ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out); + ret = z_erofs_lz4_decompress_mem(&ctx, dst); if (!dst_maptype) - kunmap_atomic(dst); + kunmap_local(dst); else if (dst_maptype == 2) vm_unmap_ram(dst, ctx.outpages); return ret; @@ -373,17 +374,71 @@ static struct z_erofs_decompressor decompressors[] = { .name = "interlaced" }, [Z_EROFS_COMPRESSION_LZ4] = { + .config = z_erofs_load_lz4_config, .decompress = z_erofs_lz4_decompress, .name = "lz4" }, #ifdef CONFIG_EROFS_FS_ZIP_LZMA [Z_EROFS_COMPRESSION_LZMA] = { + .config = z_erofs_load_lzma_config, .decompress = z_erofs_lzma_decompress, .name = "lzma" }, #endif }; +int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + unsigned int algs, alg; + erofs_off_t offset; + int size, ret = 0; + + if (!erofs_sb_has_compr_cfgs(sbi)) { + sbi->available_compr_algs = 1 << Z_EROFS_COMPRESSION_LZ4; + return z_erofs_load_lz4_config(sb, dsb, NULL, 0); + } + + sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); + if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { + erofs_err(sb, "unidentified algorithms %x, please upgrade kernel", + sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); + return -EOPNOTSUPP; + } + + offset = EROFS_SUPER_OFFSET + sbi->sb_size; + alg = 0; + for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { + void *data; + + if (!(algs & 1)) + continue; + + data = erofs_read_metadata(sb, &buf, &offset, &size); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + break; + } + + if (alg >= ARRAY_SIZE(decompressors) || + !decompressors[alg].config) { + erofs_err(sb, "algorithm %d isn't enabled on this kernel", + alg); + ret = -EOPNOTSUPP; + } else { + ret = decompressors[alg].config(sb, + dsb, data, size); + } + + kfree(data); + if (ret) + break; + } + erofs_put_metabuf(&buf); + return ret; +} + int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool) { diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 49addc345aeb..970464c4b676 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -72,10 +72,10 @@ int z_erofs_lzma_init(void) } int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size) + struct erofs_super_block *dsb, void *data, int size) { static DEFINE_MUTEX(lzma_resize_mutex); + struct z_erofs_lzma_cfgs *lzma = data; unsigned int dict_size, i; struct z_erofs_lzma *strm, *head = NULL; int err; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index d8d09fc3ed65..79a7a5815ea6 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -471,6 +471,8 @@ struct erofs_map_dev { /* data.c */ extern const struct file_operations erofs_file_fops; +void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, + erofs_off_t *offset, int *lengthp); void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); void *erofs_bread(struct erofs_buf *buf, struct inode *inode, @@ -565,9 +567,7 @@ void z_erofs_exit_zip_subsystem(void); int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, struct erofs_workgroup *egrp); int erofs_try_to_free_cached_page(struct page *page); -int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int len); +int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb); #else static inline void erofs_shrinker_register(struct super_block *sb) {} static inline void erofs_shrinker_unregister(struct super_block *sb) {} @@ -575,36 +575,14 @@ static inline int erofs_init_shrinker(void) { return 0; } static inline void erofs_exit_shrinker(void) {} static inline int z_erofs_init_zip_subsystem(void) { return 0; } static inline void z_erofs_exit_zip_subsystem(void) {} -static inline int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int len) -{ - if (lz4 || dsb->u1.lz4_max_distance) { - erofs_err(sb, "lz4 algorithm isn't enabled"); - return -EINVAL; - } - return 0; -} #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_ZIP_LZMA int z_erofs_lzma_init(void); void z_erofs_lzma_exit(void); -int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size); #else static inline int z_erofs_lzma_init(void) { return 0; } static inline int z_erofs_lzma_exit(void) { return 0; } -static inline int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size) { - if (lzma) { - erofs_err(sb, "lzma algorithm isn't enabled"); - return -EINVAL; - } - return 0; -} #endif /* !CONFIG_EROFS_FS_ZIP */ /* flags for erofs_fscache_register_cookie() */ diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 0dc34721080c..e8ccaa761bd6 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -137,24 +137,24 @@ static void *find_target_block_classic(struct erofs_buf *target, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - if (!diff) { - *_ndirents = 0; - goto out; - } else if (diff > 0) { - head = mid + 1; - startprfx = matched; - - if (!IS_ERR(candidate)) - erofs_put_metabuf(target); - *target = buf; - candidate = de; - *_ndirents = ndirents; - } else { + if (diff < 0) { erofs_put_metabuf(&buf); - back = mid - 1; endprfx = matched; + continue; } + + if (!IS_ERR(candidate)) + erofs_put_metabuf(target); + *target = buf; + if (!diff) { + *_ndirents = 0; + return de; + } + head = mid + 1; + startprfx = matched; + candidate = de; + *_ndirents = ndirents; continue; } out: /* free if the candidate is valid */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index bd8bf8fc2f5d..f2647126cb2f 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -126,8 +126,8 @@ static bool check_layout_compatibility(struct super_block *sb, #ifdef CONFIG_EROFS_FS_ZIP /* read variable-sized metadata, offset will be aligned by 4-byte */ -static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, - erofs_off_t *offset, int *lengthp) +void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, + erofs_off_t *offset, int *lengthp) { u8 *buffer, *ptr; int len, i, cnt; @@ -159,64 +159,15 @@ static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, } return buffer; } - -static int erofs_load_compr_cfgs(struct super_block *sb, - struct erofs_super_block *dsb) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - unsigned int algs, alg; - erofs_off_t offset; - int size, ret = 0; - - sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); - if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { - erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", - sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); - return -EINVAL; - } - - offset = EROFS_SUPER_OFFSET + sbi->sb_size; - alg = 0; - for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { - void *data; - - if (!(algs & 1)) - continue; - - data = erofs_read_metadata(sb, &buf, &offset, &size); - if (IS_ERR(data)) { - ret = PTR_ERR(data); - break; - } - - switch (alg) { - case Z_EROFS_COMPRESSION_LZ4: - ret = z_erofs_load_lz4_config(sb, dsb, data, size); - break; - case Z_EROFS_COMPRESSION_LZMA: - ret = z_erofs_load_lzma_config(sb, dsb, data, size); - break; - default: - DBG_BUGON(1); - ret = -EFAULT; - } - kfree(data); - if (ret) - break; - } - erofs_put_metabuf(&buf); - return ret; -} #else -static int erofs_load_compr_cfgs(struct super_block *sb, - struct erofs_super_block *dsb) +static int z_erofs_parse_cfgs(struct super_block *sb, + struct erofs_super_block *dsb) { - if (dsb->u1.available_compr_algs) { - erofs_err(sb, "try to load compressed fs when compression is disabled"); - return -EINVAL; - } - return 0; + if (!dsb->u1.available_compr_algs) + return 0; + + erofs_err(sb, "compression disabled, unable to mount compressed EROFS"); + return -EOPNOTSUPP; } #endif @@ -398,10 +349,7 @@ static int erofs_read_superblock(struct super_block *sb) } /* parse on-disk compression configurations */ - if (erofs_sb_has_compr_cfgs(sbi)) - ret = erofs_load_compr_cfgs(sb, dsb); - else - ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); + ret = z_erofs_parse_cfgs(sb, dsb); if (ret < 0) goto out; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index cf9a2fa7f55d..47e71964eeff 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -652,7 +652,6 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) if (ztailpacking) { pcl->obj.index = 0; /* which indicates ztailpacking */ - pcl->pageofs_in = erofs_blkoff(map->m_pa); pcl->tailpacking_size = map->m_plen; } else { pcl->obj.index = map->m_pa >> PAGE_SHIFT; @@ -852,6 +851,7 @@ repeat: get_page(fe->map.buf.page); WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, fe->map.buf.page); + fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } else { /* bind cache first when cached decompression is preferred */ diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0337b70b2dac..abcded1acd19 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -610,7 +610,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, .map = map, }; int err = 0; - unsigned int lclusterbits, endoff; + unsigned int lclusterbits, endoff, afmt; unsigned long initial_lcn; unsigned long long ofs, end; @@ -700,17 +700,20 @@ static int z_erofs_do_map_blocks(struct inode *inode, err = -EFSCORRUPTED; goto unmap_out; } - if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) - map->m_algorithmformat = - Z_EROFS_COMPRESSION_INTERLACED; - else - map->m_algorithmformat = - Z_EROFS_COMPRESSION_SHIFTED; - } else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) { - map->m_algorithmformat = vi->z_algorithmtype[1]; + afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ? + Z_EROFS_COMPRESSION_INTERLACED : + Z_EROFS_COMPRESSION_SHIFTED; } else { - map->m_algorithmformat = vi->z_algorithmtype[0]; + afmt = m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2 ? + vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; + if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { + erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", + afmt, vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } } + map->m_algorithmformat = afmt; if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && diff --git a/fs/exec.c b/fs/exec.c index 283012eb1aeb..b01434d6a512 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -896,6 +896,7 @@ int transfer_args_to_stack(struct linux_binprm *bprm, goto out; } + bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE; *sp_location = sp; out: @@ -1410,6 +1411,9 @@ int begin_new_exec(struct linux_binprm * bprm) out_unlock: up_write(&me->signal->exec_update_lock); + if (!bprm->cred) + mutex_unlock(&me->signal->cred_guard_mutex); + out: return retval; } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c648a493faf2..3204bd33e4e8 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -18,7 +18,7 @@ #include #include -#define dprintk(fmt, args...) do{}while(0) +#define dprintk(fmt, args...) pr_debug(fmt, ##args) static int get_name(const struct path *path, char *name, struct dentry *child); @@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, inode_unlock(dentry->d_inode); if (IS_ERR(parent)) { - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + dprintk("get_parent of %lu failed, err %ld\n", + dentry->d_inode->i_ino, PTR_ERR(parent)); return parent; } @@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, dprintk("%s: found name: %s\n", __func__, nbuf); tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { - dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); err = PTR_ERR(tmp); goto out_err; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index aa5aadd70bbc..67af684e44e6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode, /* - * ext4_ext_determine_hole - determine hole around given block + * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole @@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode, * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ -static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *lblk) +static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; @@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, return len; } -/* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static void -ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, - ext4_lblk_t hole_len) -{ - struct extent_status es; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, - hole_start + hole_len - 1, &es); - if (es.es_len) { - /* There's delayed extent containing lblock? */ - if (es.es_lblk <= hole_start) - return; - hole_len = min(es.es_lblk - hole_start, hole_len); - } - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, - EXTENT_STATUS_HOLE); -} - /* * ext4_ext_rm_idx: * removes index from the index block. @@ -4064,6 +4040,69 @@ static int get_implied_cluster_alloc(struct super_block *sb, return 0; } +/* + * Determine hole length around the given logical block, first try to + * locate and expand the hole from the given @path, and then adjust it + * if it's partially or completely converted to delayed extents, insert + * it into the extent cache tree if it's indeed a hole, finally return + * the length of the determined extent. + */ +static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t lblk) +{ + ext4_lblk_t hole_start, len; + struct extent_status es; + + hole_start = lblk; + len = ext4_ext_find_hole(inode, path, &hole_start); +again: + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, + hole_start + len - 1, &es); + if (!es.es_len) + goto insert_hole; + + /* + * There's a delalloc extent in the hole, handle it if the delalloc + * extent is in front of, behind and straddle the queried range. + */ + if (lblk >= es.es_lblk + es.es_len) { + /* + * The delalloc extent is in front of the queried range, + * find again from the queried start block. + */ + len -= lblk - hole_start; + hole_start = lblk; + goto again; + } else if (in_range(lblk, es.es_lblk, es.es_len)) { + /* + * The delalloc extent containing lblk, it must have been + * added after ext4_map_blocks() checked the extent status + * tree, adjust the length to the delalloc extent's after + * lblk. + */ + len = es.es_lblk + es.es_len - lblk; + return len; + } else { + /* + * The delalloc extent is partially or completely behind + * the queried range, update hole length until the + * beginning of the delalloc extent. + */ + len = min(es.es_lblk - hole_start, len); + } + +insert_hole: + /* Put just found gap into cache to speed up subsequent requests */ + ext_debug(inode, " -> %u:%u\n", hole_start, len); + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + + /* Update hole_len to reflect hole size after lblk */ + if (hole_start != lblk) + len -= lblk - hole_start; + + return len; +} /* * Block allocation/map/preallocation routine for extents based files @@ -4181,22 +4220,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { - ext4_lblk_t hole_start, hole_len; + ext4_lblk_t len; - hole_start = map->m_lblk; - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); - /* - * put just found gap into cache to speed up - * subsequent requests - */ - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); - /* Update hole_len to reflect hole size after map->m_lblk */ - if (hole_start != map->m_lblk) - hole_len -= map->m_lblk - hole_start; map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, hole_len); - + map->m_len = min_t(unsigned int, map->m_len, len); goto out; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a6e41746890d..bc0ca45a5d81 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,7 +831,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, @@ -1118,6 +1118,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, atomic64_add(period, &sbi->s_mb_generation_time); } +static void mb_regenerate_buddy(struct ext4_buddy *e4b) +{ + int count; + int order = 1; + void *buddy; + + while ((buddy = mb_find_buddy(e4b, order++, &count))) + mb_set_bits(buddy, 0, count); + + e4b->bd_info->bb_fragments = 0; + memset(e4b->bd_info->bb_counters, 0, + sizeof(*e4b->bd_info->bb_counters) * + (e4b->bd_sb->s_blocksize_bits + 2)); + + ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, + e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); +} + /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1767,11 +1785,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); - this_cpu_inc(discard_pa_seq); - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ @@ -1785,21 +1798,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; + /* + * Fastcommit replay can free already freed blocks which + * corrupts allocation info. Regenerate it. + */ + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + mb_regenerate_buddy(e4b); + goto check; + } + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - ext4_grp_locked_error(sb, e4b->bd_group, - inode ? inode->i_ino : 0, - blocknr, - "freeing already freed block (bit %u); block bitmap corrupt.", - block); - ext4_mark_group_bitmap_corrupted( - sb, e4b->bd_group, + ext4_grp_locked_error(sb, e4b->bd_group, + inode ? inode->i_ino : 0, blocknr, + "freeing already freed block (bit %u); block bitmap corrupt.", + block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); - } - goto done; + return; } + this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; + /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; @@ -1824,9 +1847,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); -done: mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); +check: mb_check_buddy(e4b); } @@ -2153,6 +2176,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2160,6 +2186,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); @@ -2188,12 +2215,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2226,6 +2251,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); @@ -4658,10 +4684,16 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) .fe_len = ac->ac_g_ex.fe_len, }; loff_t orig_goal_end = extent_logical_end(sbi, &ex); + loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex); - /* we can't allocate as much as normalizer wants. - * so, found space must get proper lstart - * to cover original request */ + /* + * We can't allocate as much as normalizer wants, so we try + * to get proper lstart to cover the original request, except + * when the goal doesn't cover the original request as below: + * + * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048 + * best_ex:0/200(200) -> adjusted: 1848/2048(200) + */ BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); @@ -4673,7 +4705,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) * 1. Check if best ex can be kept at end of goal and still * cover original start * 2. Else, check if best ex can be kept at start of goal and - * still cover original start + * still cover original end * 3. Else, keep the best ex at start of original request. */ ex.fe_len = ac->ac_b_ex.fe_len; @@ -4683,7 +4715,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) goto adjust_bex; ex.fe_logical = ac->ac_g_ex.fe_logical; - if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex)) + if (o_ex_end <= extent_logical_end(sbi, &ex)) goto adjust_bex; ex.fe_logical = ac->ac_o_ex.fe_logical; @@ -4691,7 +4723,6 @@ adjust_bex: ac->ac_b_ex.fe_logical = ex.fe_logical; BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); - BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end); } @@ -6421,11 +6452,16 @@ __acquires(bitlock) static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, ext4_group_t grp) { - if (grp < ext4_get_groups_count(sb)) - return EXT4_CLUSTERS_PER_GROUP(sb) - 1; - return (ext4_blocks_count(EXT4_SB(sb)->s_es) - - ext4_group_first_block_no(sb, grp) - 1) >> - EXT4_CLUSTER_BITS(sb); + unsigned long nr_clusters_in_group; + + if (grp < (ext4_get_groups_count(sb) - 1)) + nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb); + else + nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) - + ext4_group_first_block_no(sb, grp)) + >> EXT4_CLUSTER_BITS(sb); + + return nr_clusters_in_group - 1; } static bool ext4_trim_interrupted(void) @@ -6439,13 +6475,15 @@ static int ext4_try_to_trim_range(struct super_block *sb, __acquires(ext4_group_lock_ptr(sb, e4b->bd_group)) __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) { - ext4_grpblk_t next, count, free_count; + ext4_grpblk_t next, count, free_count, last, origin_start; bool set_trimmed = false; void *bitmap; + last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; - if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) + if (start == 0 && max >= last) set_trimmed = true; + origin_start = start; start = max(e4b->bd_info->bb_first_free, start); count = 0; free_count = 0; @@ -6454,7 +6492,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) start = mb_find_next_zero_bit(bitmap, max + 1, start); if (start > max) break; - next = mb_find_next_bit(bitmap, max + 1, start); + + next = mb_find_next_bit(bitmap, last + 1, start); + if (origin_start == 0 && next >= last) + set_trimmed = true; if ((next - start) >= minblocks) { int ret = ext4_trim_extent(sb, start, next - start, e4b); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index dedc9d445f24..8e3ff150bc36 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -621,6 +621,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, goto out; o_end = o_start + len; + *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; @@ -675,7 +676,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ - move_extent_per_page(o_filp, donor_inode, + *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); @@ -685,9 +686,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, o_start += cur_len; d_start += cur_len; } - *moved_len = o_start - orig_blk; - if (*moved_len > len) - *moved_len = len; out: if (*moved_len) { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9833ab6db117..38ce42396758 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -231,17 +231,24 @@ struct ext4_new_flex_group_data { in the flex group */ __u16 *bg_flags; /* block group flags of groups in @groups */ + ext4_group_t resize_bg; /* number of allocated + new_group_data */ ext4_group_t count; /* number of groups in @groups */ }; +/* + * Avoiding memory allocation failures due to too many groups added each time. + */ +#define MAX_RESIZE_BG 16384 + /* * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of * @flexbg_size. * * Returns NULL on failure otherwise address of the allocated structure. */ -static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) +static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size) { struct ext4_new_flex_group_data *flex_gd; @@ -249,17 +256,18 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) - goto out2; - flex_gd->count = flexbg_size; + if (unlikely(flexbg_size > MAX_RESIZE_BG)) + flex_gd->resize_bg = MAX_RESIZE_BG; + else + flex_gd->resize_bg = flexbg_size; - flex_gd->groups = kmalloc_array(flexbg_size, + flex_gd->groups = kmalloc_array(flex_gd->resize_bg, sizeof(struct ext4_new_group_data), GFP_NOFS); if (flex_gd->groups == NULL) goto out2; - flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16), + flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16), GFP_NOFS); if (flex_gd->bg_flags == NULL) goto out1; @@ -296,7 +304,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) */ static int ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - int flexbg_size) + unsigned int flexbg_size) { struct ext4_new_group_data *group_data = flex_gd->groups; ext4_fsblk_t start_blk; @@ -397,12 +405,12 @@ next_group: group = group_data[0].group; printk(KERN_DEBUG "EXT4-fs: adding a flex group with " - "%d groups, flexbg size is %d:\n", flex_gd->count, + "%u groups, flexbg size is %u:\n", flex_gd->count, flexbg_size); for (i = 0; i < flex_gd->count; i++) { ext4_debug( - "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n", + "adding %s group %u: %u blocks (%u free, %u mdata blocks)\n", ext4_bg_has_super(sb, group + i) ? "normal" : "no-super", group + i, group_data[i].blocks_count, @@ -1598,7 +1606,8 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); - int meta_bg = ext4_has_feature_meta_bg(sb); + int meta_bg = ext4_has_feature_meta_bg(sb) && + gdb_num >= le32_to_cpu(es->s_first_meta_bg); sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - ext4_group_first_block_no(sb, 0); sector_t old_gdb = 0; @@ -1623,8 +1632,7 @@ exit: static int ext4_setup_next_flex_gd(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - ext4_fsblk_t n_blocks_count, - unsigned long flexbg_size) + ext4_fsblk_t n_blocks_count) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; @@ -1648,7 +1656,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, BUG_ON(last); ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); - last_group = group | (flexbg_size - 1); + last_group = group | (flex_gd->resize_bg - 1); if (last_group > n_group) last_group = n_group; @@ -2010,8 +2018,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_fsblk_t o_blocks_count; ext4_fsblk_t n_blocks_count_retry = 0; unsigned long last_update_time = 0; - int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; + int err = 0; int meta_bg; + unsigned int flexbg_size = ext4_flex_bg_size(sbi); /* See if the device is actually as big as what was requested */ bh = ext4_sb_bread(sb, n_blocks_count - 1, 0); @@ -2152,8 +2161,7 @@ retry: /* Add flex groups. Note that a regular group is a * flex group with 1 group. */ - while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, - flexbg_size)) { + while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) { if (time_is_before_jiffies(last_update_time + HZ * 10)) { if (last_update_time) ext4_msg(sb, KERN_INFO, diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index eb4d69f53337..3ec203bbd559 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -70,7 +70,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_por = !is_meta, + .is_por = !is_meta ? 1 : 0, }; int err; @@ -234,8 +234,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op = REQ_OP_READ, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, - .in_list = false, - .is_por = (type == META_POR), + .in_list = 0, + .is_por = (type == META_POR) ? 1 : 0, }; struct blk_plug plug; int err; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4cb58e8d699e..df6dfd7de6d0 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -55,6 +55,7 @@ struct f2fs_compress_ops { int (*init_decompress_ctx)(struct decompress_io_ctx *dic); void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); int (*decompress_pages)(struct decompress_io_ctx *dic); + bool (*is_level_valid)(int level); }; static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index) @@ -322,11 +323,21 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) return 0; } +static bool lz4_is_level_valid(int lvl) +{ +#ifdef CONFIG_F2FS_FS_LZ4HC + return !lvl || (lvl >= LZ4HC_MIN_CLEVEL && lvl <= LZ4HC_MAX_CLEVEL); +#else + return lvl == 0; +#endif +} + static const struct f2fs_compress_ops f2fs_lz4_ops = { .init_compress_ctx = lz4_init_compress_ctx, .destroy_compress_ctx = lz4_destroy_compress_ctx, .compress_pages = lz4_compress_pages, .decompress_pages = lz4_decompress_pages, + .is_level_valid = lz4_is_level_valid, }; #endif @@ -490,6 +501,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) return 0; } +static bool zstd_is_level_valid(int lvl) +{ + return lvl >= zstd_min_clevel() && lvl <= zstd_max_clevel(); +} + static const struct f2fs_compress_ops f2fs_zstd_ops = { .init_compress_ctx = zstd_init_compress_ctx, .destroy_compress_ctx = zstd_destroy_compress_ctx, @@ -497,6 +513,7 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = { .init_decompress_ctx = zstd_init_decompress_ctx, .destroy_decompress_ctx = zstd_destroy_decompress_ctx, .decompress_pages = zstd_decompress_pages, + .is_level_valid = zstd_is_level_valid, }; #endif @@ -555,6 +572,16 @@ bool f2fs_is_compress_backend_ready(struct inode *inode) return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; } +bool f2fs_is_compress_level_valid(int alg, int lvl) +{ + const struct f2fs_compress_ops *cops = f2fs_cops[alg]; + + if (cops->is_level_valid) + return cops->is_level_valid(lvl); + + return lvl == 0; +} + static mempool_t *compress_page_pool; static int num_compress_pages = 512; module_param(num_compress_pages, uint, 0444); @@ -1026,8 +1053,10 @@ static void set_cluster_dirty(struct compress_ctx *cc) int i; for (i = 0; i < cc->cluster_size; i++) - if (cc->rpages[i]) + if (cc->rpages[i]) { set_page_dirty(cc->rpages[i]); + set_page_private_gcing(cc->rpages[i]); + } } static int prepare_compress_overwrite(struct compress_ctx *cc, @@ -1220,10 +1249,11 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .page = NULL, .encrypted_page = NULL, .compressed_page = NULL, - .submitted = false, + .submitted = 0, .io_type = io_type, .io_wbc = wbc, - .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode), + .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? + 1 : 0, }; struct dnode_of_data dn; struct node_info ni; @@ -1358,8 +1388,6 @@ unlock_continue: add_compr_block_stat(inode, cc->valid_nr_cpages); set_inode_flag(cc->inode, FI_APPEND_WRITE); - if (cc->cluster_idx == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); if (quota_inode) @@ -1407,6 +1435,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = (struct compress_io_ctx *)page_private(page); + enum count_type type = WB_DATA_TYPE(page, + f2fs_is_compressed_page(page)); int i; if (unlikely(bio->bi_status)) @@ -1414,7 +1444,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) f2fs_compress_free_page(page); - dec_page_count(sbi, F2FS_WB_DATA); + dec_page_count(sbi, type); if (atomic_dec_return(&cic->pending_pages)) return; @@ -1430,12 +1460,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) } static int f2fs_write_raw_pages(struct compress_ctx *cc, - int *submitted, + int *submitted_p, struct writeback_control *wbc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret, i; + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + int submitted, compr_blocks, i; + int ret = 0; compr_blocks = f2fs_compressed_blocks(cc); @@ -1450,6 +1482,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, if (compr_blocks < 0) return compr_blocks; + /* overwrite compressed cluster w/ normal cluster */ + if (compr_blocks > 0) + f2fs_lock_op(sbi); + for (i = 0; i < cc->cluster_size; i++) { if (!cc->rpages[i]) continue; @@ -1474,7 +1510,7 @@ continue_unlock: if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + ret = f2fs_write_single_data_page(cc->rpages[i], &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { @@ -1482,26 +1518,29 @@ continue_unlock: unlock_page(cc->rpages[i]); ret = 0; } else if (ret == -EAGAIN) { + ret = 0; /* * for quota file, just redirty left pages to * avoid deadlock caused by cluster update race * from foreground operation. */ if (IS_NOQUOTA(cc->inode)) - return 0; - ret = 0; + goto out; f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); goto retry_write; } - return ret; + goto out; } - *submitted += _submitted; + *submitted_p += submitted; } - f2fs_balance_fs(F2FS_M_SB(mapping), true); +out: + if (compr_blocks > 0) + f2fs_unlock_op(sbi); - return 0; + f2fs_balance_fs(sbi, true); + return ret; } int f2fs_write_multi_pages(struct compress_ctx *cc, @@ -1804,16 +1843,18 @@ void f2fs_put_page_dic(struct page *page, bool in_task) * check whether cluster blocks are contiguous, and add extent cache entry * only if cluster blocks are logically and physically contiguous. */ -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node) { - bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR; + bool compressed = data_blkaddr(dn->inode, dn->node_page, + ofs_in_node) == COMPRESS_ADDR; int i = compressed ? 1 : 0; block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) break; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8b561af37974..b83b8ac29f43 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -50,7 +50,7 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -static bool __is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; @@ -67,8 +67,6 @@ static bool __is_cp_guaranteed(struct page *page) S_ISDIR(inode->i_mode)) return true; - if (f2fs_is_compressed_page(page)) - return false; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || page_private_gcing(page)) return true; @@ -327,7 +325,7 @@ static void f2fs_write_end_io(struct bio *bio) bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; - enum count_type type = WB_DATA_TYPE(page); + enum count_type type = WB_DATA_TYPE(page, false); if (page_private_dummy(page)) { clear_page_private_dummy(page); @@ -733,7 +731,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page) : WB_DATA_TYPE(fio->page)); + __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); __submit_bio(fio->sbi, bio, fio->type); return 0; @@ -941,7 +939,7 @@ alloc_new: if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); - inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -955,6 +953,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; + enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -982,9 +981,10 @@ next: bio_page = fio->page; /* set submitted = true as a return value */ - fio->submitted = true; + fio->submitted = 1; - inc_page_count(sbi, WB_DATA_TYPE(bio_page)); + type = WB_DATA_TYPE(bio_page, fio->compressed_page); + inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, @@ -997,8 +997,9 @@ alloc_new: if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); - fio->retry = true; + dec_page_count(sbi, WB_DATA_TYPE(bio_page, + fio->compressed_page)); + fio->retry = 1; goto skip; } io->bio = __bio_alloc(fio, BIO_MAX_VECS); @@ -1102,18 +1103,12 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return 0; } -static void __set_data_blkaddr(struct dnode_of_data *dn) +static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - struct f2fs_node *rn = F2FS_NODE(dn->node_page); - __le32 *addr_array; - int base = 0; + __le32 *addr = get_dnode_addr(dn->inode, dn->node_page); - if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) - base = get_extra_isize(dn->inode); - - /* Get physical address of data block */ - addr_array = blkaddr_in_node(rn); - addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); + dn->data_blkaddr = blkaddr; + addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } /* @@ -1122,18 +1117,17 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) * ->node_page * update block addresses in the node page */ -void f2fs_set_data_blkaddr(struct dnode_of_data *dn) +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); - __set_data_blkaddr(dn); + __set_data_blkaddr(dn, blkaddr); if (set_page_dirty(dn->node_page)) dn->node_changed = true; } void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - dn->data_blkaddr = blkaddr; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, blkaddr); f2fs_update_read_extent_cache(dn); } @@ -1148,7 +1142,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + err = inc_valid_block_count(sbi, dn->inode, &count, true); + if (unlikely(err)) return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, @@ -1160,8 +1155,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) block_t blkaddr = f2fs_data_blkaddr(dn); if (blkaddr == NULL_ADDR) { - dn->data_blkaddr = NEW_ADDR; - __set_data_blkaddr(dn); + __set_data_blkaddr(dn, NEW_ADDR); count--; } } @@ -1419,13 +1413,12 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) return err; dn->data_blkaddr = f2fs_data_blkaddr(dn); - if (dn->data_blkaddr != NULL_ADDR) - goto alloc; + if (dn->data_blkaddr == NULL_ADDR) { + err = inc_valid_block_count(sbi, dn->inode, &count, true); + if (unlikely(err)) + return err; + } - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) - return err; - -alloc: set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, @@ -2739,8 +2732,6 @@ got_it: f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); out_writepage: f2fs_put_dnode(&dn); out: @@ -2776,10 +2767,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, - .submitted = false, + .submitted = 0, .compr_blocks = compr_blocks, - .need_lock = LOCK_RETRY, - .post_read = f2fs_post_read_required(inode), + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, + .post_read = f2fs_post_read_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, .bio = bio, @@ -2819,9 +2810,6 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, zero_user_segment(page, offset, PAGE_SIZE); write: - if (f2fs_is_drop_cache(inode)) - goto out; - /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { /* @@ -2858,6 +2846,7 @@ write: if (err == -EAGAIN) { err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { + f2fs_bug_on(sbi, compr_blocks); fio.need_lock = LOCK_REQ; err = f2fs_do_write_data_page(&fio); } @@ -2902,7 +2891,7 @@ out: } if (submitted) - *submitted = fio.submitted ? 1 : 0; + *submitted = fio.submitted; return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 93b9da97f1ae..39978f244121 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -74,6 +74,11 @@ struct f2fs_fault_info { extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type)) + +/* maximum retry count for injected failure */ +#define DEFAULT_FAILURE_RETRY_COUNT 8 +#else +#define DEFAULT_FAILURE_RETRY_COUNT 1 #endif #define MIN_ROOT_RESERVED_BLOCKS (128 * 1024 * 1024) @@ -766,8 +771,6 @@ enum { FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ - FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ @@ -1069,7 +1072,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(p, f) \ + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -1185,19 +1189,19 @@ struct f2fs_io_info { struct page *encrypted_page; /* encrypted page */ struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ - bool submitted; /* indicate IO submission */ - int need_lock; /* indicate we need to lock cp_rwsem */ - bool in_list; /* indicate fio is in io_list */ - bool is_por; /* indicate IO is from recovery or not */ - bool retry; /* need to reallocate block address */ - int compr_blocks; /* # of compressed block addresses */ - bool encrypted; /* indicate file is encrypted */ - bool post_read; /* require post read */ + unsigned int compr_blocks; /* # of compressed block addresses */ + unsigned int need_lock:8; /* indicate we need to lock cp_rwsem */ + unsigned int version:8; /* version of the node */ + unsigned int submitted:1; /* indicate IO submission */ + unsigned int in_list:1; /* indicate fio is in io_list */ + unsigned int is_por:1; /* indicate IO is from recovery or not */ + unsigned int retry:1; /* need to reallocate block address */ + unsigned int encrypted:1; /* indicate file is encrypted */ + unsigned int post_read:1; /* require post read */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ sector_t *last_block; /* last block number in bio */ - unsigned char version; /* version of the node */ }; struct bio_entry { @@ -2291,7 +2295,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t *count) + struct inode *inode, blkcnt_t *count, bool partial) { blkcnt_t diff = 0, release = 0; block_t avail_user_block_count; @@ -2332,6 +2336,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, avail_user_block_count = 0; } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + if (!partial) { + spin_unlock(&sbi->stat_lock); + goto enospc; + } + diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) diff = *count; @@ -2995,6 +3004,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: + case FI_ATOMIC_COMMITTED: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -3251,22 +3261,13 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline bool f2fs_is_first_block_written(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); -} - -static inline bool f2fs_is_drop_cache(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_DROP_CACHE); -} - +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page); static inline void *inline_data_addr(struct inode *inode, struct page *page) { - struct f2fs_inode *ri = F2FS_INODE(page); - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, page); - return (void *)&(ri->i_addr[extra_size + DEF_INLINE_RESERVED_SIZE]); + return (void *)(addr + DEF_INLINE_RESERVED_SIZE); } static inline int f2fs_has_inline_dentry(struct inode *inode) @@ -3401,6 +3402,17 @@ static inline int get_inline_xattr_addrs(struct inode *inode) return F2FS_I(inode)->i_inline_xattr_size; } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page) +{ + int base = 0; + + if (IS_INODE(node_page) && f2fs_has_extra_attr(inode)) + base = get_extra_isize(inode); + + return blkaddr_in_node(F2FS_NODE(node_page)) + base; +} + #define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -3765,6 +3777,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); +bool f2fs_is_cp_guaranteed(struct page *page); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_bio(struct f2fs_sb_info *sbi, @@ -3783,7 +3796,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, sector_t *sector); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); -void f2fs_set_data_blkaddr(struct dnode_of_data *dn); +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); int f2fs_reserve_new_block(struct dnode_of_data *dn); @@ -4223,6 +4236,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); +bool f2fs_is_compress_level_valid(int alg, int lvl); int f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); @@ -4249,7 +4263,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); void f2fs_put_page_dic(struct page *page, bool in_task); -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); @@ -4287,6 +4302,7 @@ static inline bool f2fs_is_compress_backend_ready(struct inode *inode) /* not support compression */ return false; } +static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; } static inline struct page *f2fs_compress_control_page(struct page *page) { WARN_ON_ONCE(1); @@ -4305,7 +4321,8 @@ static inline void f2fs_put_page_dic(struct page *page, bool in_task) { WARN_ON_ONCE(1); } -static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; } +static inline unsigned int f2fs_cluster_blocks_are_contiguous( + struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; } static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; } static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } @@ -4362,15 +4379,24 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - if (!f2fs_compressed_file(inode)) + f2fs_down_write(&F2FS_I(inode)->i_sem); + + if (!f2fs_compressed_file(inode)) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return true; - if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) + } + if (f2fs_is_mmap_file(inode) || + (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return false; + } fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); clear_inode_flag(inode, FI_COMPRESSED_FILE); f2fs_mark_inode_dirty_sync(inode, true); + + f2fs_up_write(&F2FS_I(inode)->i_sem); return true; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7c486d5e5e53..2ea9c021f15a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -581,20 +581,14 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct f2fs_node *raw_node; int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; - int base = 0; bool compressed_cluster = false; int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); - if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) - base = get_extra_isize(dn->inode); - - raw_node = F2FS_NODE(dn->node_page); - addr = blkaddr_in_node(raw_node) + base + ofs; + addr = get_dnode_addr(dn->inode, dn->node_page) + ofs; /* Assumption: truncateion starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { @@ -612,8 +606,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) if (blkaddr == NULL_ADDR) continue; - dn->data_blkaddr = NULL_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, @@ -623,9 +616,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) valid_blocks++; } - if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) - clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); - f2fs_invalidate_blocks(sbi, blkaddr); if (!released || blkaddr != COMPRESS_ADDR) @@ -1347,6 +1337,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); set_page_dirty(pdst); + set_page_private_gcing(pdst); f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); @@ -1517,8 +1508,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, } f2fs_invalidate_blocks(sbi, dn->data_blkaddr); - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NEW_ADDR); } f2fs_update_read_extent_cache_range(dn, start, 0, index - start); @@ -3469,8 +3459,7 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (blkaddr != NEW_ADDR) continue; - dn->data_blkaddr = NULL_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NULL_ADDR); } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); @@ -3494,7 +3483,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) int ret; int writecount; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) @@ -3507,7 +3496,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3593,10 +3582,10 @@ out: return ret; } -static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, + unsigned int *reserved_blocks) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - unsigned int reserved_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; block_t blkaddr; int i; @@ -3619,41 +3608,53 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) blkcnt_t reserved; int ret; - for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { - blkaddr = f2fs_data_blkaddr(dn); + for (i = 0; i < cluster_size; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); if (i == 0) { - if (blkaddr == COMPRESS_ADDR) - continue; - dn->ofs_in_node += cluster_size; - goto next; - } - - if (__is_valid_data_blkaddr(blkaddr)) { - compr_blocks++; + if (blkaddr != COMPRESS_ADDR) { + dn->ofs_in_node += cluster_size; + goto next; + } continue; } - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + /* + * compressed cluster was not released due to it + * fails in release_compress_blocks(), so NEW_ADDR + * is a possible case. + */ + if (blkaddr == NEW_ADDR || + __is_valid_data_blkaddr(blkaddr)) { + compr_blocks++; + continue; + } } reserved = cluster_size - compr_blocks; - ret = inc_valid_block_count(sbi, dn->inode, &reserved); - if (ret) + + /* for the case all blocks in cluster were reserved */ + if (reserved == 1) + goto next; + + ret = inc_valid_block_count(sbi, dn->inode, &reserved, false); + if (unlikely(ret)) return ret; - if (reserved != cluster_size - compr_blocks) - return -ENOSPC; + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + if (f2fs_data_blkaddr(dn) == NULL_ADDR) + f2fs_set_data_blkaddr(dn, NEW_ADDR); + } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - reserved_blocks += reserved; + *reserved_blocks += reserved; next: count -= cluster_size; } - return reserved_blocks; + return 0; } static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) @@ -3664,7 +3665,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) unsigned int reserved_blocks = 0; int ret; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) @@ -3677,10 +3678,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) - goto out; - - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3689,6 +3687,9 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } + if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) + goto unlock_inode; + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -3714,7 +3715,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, F2FS_I(inode)->i_cluster_size); - ret = reserve_compress_blocks(&dn, count); + ret = reserve_compress_blocks(&dn, count, &reserved_blocks); f2fs_put_dnode(&dn); @@ -3722,23 +3723,21 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; page_idx += count; - reserved_blocks += ret; } filemap_invalidate_unlock(inode->i_mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (ret >= 0) { + if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, true); } unlock_inode: inode_unlock(inode); -out: mnt_drop_write_file(filp); - if (ret >= 0) { + if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && atomic_read(&F2FS_I(inode)->i_compr_blocks)) { @@ -3987,16 +3986,20 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) sizeof(option))) return -EFAULT; - if (!f2fs_compressed_file(inode) || - option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || - option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || - option.algorithm >= COMPRESS_MAX) + if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || + option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || + option.algorithm >= COMPRESS_MAX) return -EINVAL; file_start_write(filp); inode_lock(inode); f2fs_down_write(&F2FS_I(inode)->i_sem); + if (!f2fs_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } + if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { ret = -EBUSY; goto out; @@ -4058,6 +4061,7 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) f2fs_bug_on(F2FS_I_SB(inode), !page); set_page_dirty(page); + set_page_private_gcing(page); f2fs_put_page(page, 1); f2fs_put_page(page, 0); } @@ -4085,7 +4089,7 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) if (!f2fs_compressed_file(inode)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); @@ -4157,7 +4161,7 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) if (!f2fs_compressed_file(inode)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ec7212f7a9b7..d4662ccb94c8 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1187,8 +1187,8 @@ static int ra_data_block(struct inode *inode, pgoff_t index) .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, - .in_list = false, - .retry = false, + .in_list = 0, + .retry = 0, }; int err; @@ -1276,8 +1276,8 @@ static int move_data_block(struct inode *inode, block_t bidx, .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, - .in_list = false, - .retry = false, + .in_list = 0, + .retry = 0, }; struct dnode_of_data dn; struct f2fs_summary sum; @@ -1410,8 +1410,6 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0010579f1736..869bb6ec107c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -59,49 +59,31 @@ void f2fs_set_inode_flags(struct inode *inode) S_ENCRYPTED|S_VERITY|S_CASEFOLD); } -static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +static void __get_inode_rdev(struct inode *inode, struct page *node_page) { - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, node_page); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - if (ri->i_addr[extra_size]) - inode->i_rdev = old_decode_dev( - le32_to_cpu(ri->i_addr[extra_size])); + if (addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(addr[0])); else - inode->i_rdev = new_decode_dev( - le32_to_cpu(ri->i_addr[extra_size + 1])); + inode->i_rdev = new_decode_dev(le32_to_cpu(addr[1])); } } -static int __written_first_block(struct f2fs_sb_info *sbi, - struct f2fs_inode *ri) +static void __set_inode_rdev(struct inode *inode, struct page *node_page) { - block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - - if (!__is_valid_data_blkaddr(addr)) - return 1; - if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); - return -EFSCORRUPTED; - } - return 0; -} - -static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) -{ - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, node_page); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[extra_size] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[extra_size + 1] = 0; + addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); + addr[1] = 0; } else { - ri->i_addr[extra_size] = 0; - ri->i_addr[extra_size + 1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[extra_size + 2] = 0; + addr[0] = 0; + addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); + addr[2] = 0; } } } @@ -336,7 +318,6 @@ static int do_read_inode(struct inode *inode) struct page *node_page; struct f2fs_inode *ri; projid_t i_projid; - int err; /* Check if ino is within scope */ if (f2fs_check_nid_range(sbi, inode->i_ino)) @@ -415,17 +396,7 @@ static int do_read_inode(struct inode *inode) } /* get rdev by using inline_info */ - __get_inode_rdev(inode, ri); - - if (S_ISREG(inode->i_mode)) { - err = __written_first_block(sbi, ri); - if (err < 0) { - f2fs_put_page(node_page, 1); - return err; - } - if (!err) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); - } + __get_inode_rdev(inode, node_page); if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; @@ -697,7 +668,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) } } - __set_inode_rdev(inode, ri); + __set_inode_rdev(inode, node_page); /* deleted inode */ if (inode->i_nlink == 0) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c6d0e0709632..fcf22a50ff5d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -850,21 +850,29 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && f2fs_sb_has_readonly(sbi)) { - unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn); + unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + unsigned int ofs_in_node = dn->ofs_in_node; + pgoff_t fofs = index; + unsigned int c_len; block_t blkaddr; + /* should align fofs and ofs_in_node to cluster_size */ + if (fofs % cluster_size) { + fofs = round_down(fofs, cluster_size); + ofs_in_node = round_down(ofs_in_node, cluster_size); + } + + c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node); if (!c_len) goto out; - blkaddr = f2fs_data_blkaddr(dn); + blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node); if (blkaddr == COMPRESS_ADDR) blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + 1); + ofs_in_node + 1); f2fs_update_read_extent_tree_range_compressed(dn->inode, - index, blkaddr, - F2FS_I(dn->inode)->i_cluster_size, - c_len); + fofs, blkaddr, cluster_size, c_len); } out: return 0; @@ -1587,7 +1595,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, - .submitted = false, + .submitted = 0, .io_type = io_type, .io_wbc = wbc, }; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dea95b48b647..f5efc37a2b51 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -582,6 +582,19 @@ truncate_out: return 0; } +static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn) +{ + int i, err = 0; + + for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) { + err = f2fs_reserve_new_block(dn); + if (!err) + break; + } + + return err; +} + static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page) { @@ -683,20 +696,17 @@ retry_dn: */ if (dest == NEW_ADDR) { f2fs_truncate_data_blocks_range(&dn, 1); - f2fs_reserve_new_block(&dn); + + err = f2fs_reserve_new_block_retry(&dn); + if (err) + goto err; continue; } /* dest is valid block, try to recover from src to dest */ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - if (src == NULL_ADDR) { - err = f2fs_reserve_new_block(&dn); - while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) - err = f2fs_reserve_new_block(&dn); - /* We should not get -ENOSPC */ - f2fs_bug_on(sbi, err); + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; } @@ -887,6 +897,8 @@ skip: if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) && f2fs_sb_has_blkzoned(sbi)) { err = f2fs_fix_curseg_write_pointer(sbi); + if (!err) + err = f2fs_check_write_pointer(sbi); ret = err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 16bf9d5c8d4f..205216c1db91 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -192,6 +192,9 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (!f2fs_is_atomic_file(inode)) return; + if (clean) + truncate_inode_pages_final(inode->i_mapping); + release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_FILE); @@ -200,7 +203,6 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) F2FS_I(inode)->atomic_write_task = NULL; if (clean) { - truncate_inode_pages_final(inode->i_mapping); f2fs_i_size_write(inode, fi->original_i_size); fi->original_i_size = 0; } @@ -247,7 +249,7 @@ retry: } else { blkcnt_t count = 1; - err = inc_valid_block_count(sbi, inode, &count); + err = inc_valid_block_count(sbi, inode, &count, true); if (err) { f2fs_put_dnode(&dn); return err; @@ -3312,10 +3314,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_bio_info *io; if (F2FS_IO_ALIGNED(sbi)) - fio->retry = false; + fio->retry = 0; INIT_LIST_HEAD(&fio->list); - fio->in_list = true; + fio->in_list = 1; io = sbi->write_io[fio->type] + fio->temp; spin_lock(&io->io_lock); list_add_tail(&fio->list, &io->io_list); @@ -3396,7 +3398,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, .new_blkaddr = page->index, .page = page, .encrypted_page = NULL, - .in_list = false, + .in_list = 0, }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f3951e8ad394..aa9ad85e0901 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -586,23 +586,22 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, unsigned int node_blocks, unsigned int dent_blocks) { - unsigned int segno, left_blocks; + unsigned segno, left_blocks; int i; - /* check current node segment */ + /* check current node sections in the worst case. */ for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { segno = CURSEG_I(sbi, i)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; - + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (node_blocks > left_blocks) return false; } - /* check current data segment */ + /* check current data section for dentry blocks. */ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (dent_blocks > left_blocks) return false; return true; @@ -651,7 +650,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (free_secs > upper_secs) return false; - else if (free_secs <= lower_secs) + if (free_secs <= lower_secs) return true; return !curseg_space; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e023857b91d6..3502f2ed7d22 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -629,7 +629,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (level < LZ4HC_MIN_CLEVEL || level > LZ4HC_MAX_CLEVEL) { + if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) { f2fs_info(sbi, "invalid lz4hc compress level: %d", level); return -EINVAL; } @@ -650,7 +650,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) #ifdef CONFIG_F2FS_FS_ZSTD static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) { - unsigned int level; + int level; int len = 4; if (strlen(str) == len) { @@ -664,10 +664,16 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) f2fs_info(sbi, "wrong format, e.g. :"); return -EINVAL; } - if (kstrtouint(str + 1, 10, &level)) + if (kstrtoint(str + 1, 10, &level)) return -EINVAL; - if (level < zstd_min_clevel() || level > zstd_max_clevel()) { + /* f2fs does not support negative compress level now */ + if (level < 0) { + f2fs_info(sbi, "do not support negative compress level: %d", level); + return -ERANGE; + } + + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; } diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index af191371c352..bab63eeaf9cb 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -130,6 +130,12 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp, fid->parent_i_gen = parent->i_generation; type = FILEID_FAT_WITH_PARENT; *lenp = FAT_FID_SIZE_WITH_PARENT; + } else { + /* + * We need to initialize this field because the fh is actually + * 12 bytes long + */ + fid->parent_i_pos_hi = 0; } return type; diff --git a/fs/fcntl.c b/fs/fcntl.c index 146c9ab0cd4b..0964e5dbf0ca 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -267,7 +267,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) } #endif -static bool rw_hint_valid(enum rw_hint hint) +static bool rw_hint_valid(u64 hint) { switch (hint) { case RWH_WRITE_LIFE_NOT_SET: @@ -287,19 +287,17 @@ static long fcntl_rw_hint(struct file *file, unsigned int cmd, { struct inode *inode = file_inode(file); u64 __user *argp = (u64 __user *)arg; - enum rw_hint hint; - u64 h; + u64 hint; switch (cmd) { case F_GET_RW_HINT: - h = inode->i_write_hint; - if (copy_to_user(argp, &h, sizeof(*argp))) + hint = inode->i_write_hint; + if (copy_to_user(argp, &hint, sizeof(*argp))) return -EFAULT; return 0; case F_SET_RW_HINT: - if (copy_from_user(&h, argp, sizeof(h))) + if (copy_from_user(&hint, argp, sizeof(hint))) return -EFAULT; - hint = (enum rw_hint) h; if (!rw_hint_valid(hint)) return -EINVAL; diff --git a/fs/fhandle.c b/fs/fhandle.c index f2bc27d1975e..a8c25557c8c1 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -37,7 +37,7 @@ static long do_sys_name_to_handle(const struct path *path, if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; - handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, GFP_KERNEL); if (!handle) return -ENOMEM; diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index d645f8b302a2..9397ed39b0b4 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -179,13 +179,14 @@ EXPORT_SYMBOL(fscache_acquire_cache); void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where) { - unsigned int debug_id = cache->debug_id; + unsigned int debug_id; bool zero; int ref; if (IS_ERR_OR_NULL(cache)) return; + debug_id = cache->debug_id; zero = __refcount_dec_and_test(&cache->ref, &ref); trace_fscache_cache(debug_id, ref - 1, where); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5e408e7ec4c6..936a24b646ce 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -399,6 +399,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name goto out_put_forget; if (fuse_invalid_attr(&outarg->attr)) goto out_put_forget; + if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) { + pr_warn_once("root generation should be zero\n"); + outarg->generation = 0; + } *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, &outarg->attr, entry_attr_timeout(outarg), diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a9681fecbd91..253b9b78d6f1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -923,7 +923,6 @@ static inline bool fuse_stale_inode(const struct inode *inode, int generation, static inline void fuse_make_bad(struct inode *inode) { - remove_inode_hash(inode); set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f81000d96887..367e3b276092 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -445,8 +445,11 @@ retry: } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); - iput(inode); - goto retry; + if (inode != d_inode(sb->s_root)) { + remove_inode_hash(inode); + iput(inode); + goto retry; + } } fi = get_fuse_inode(inode); spin_lock(&fi->lock); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8eea709e3659..4fe4b3393e71 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -123,6 +123,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); + vm_flags_t vm_flags; /* * vma address alignment (but not the pgoff alignment) has @@ -164,10 +165,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); ret = -ENOMEM; + + vm_flags = vma->vm_flags; + /* + * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip + * reserving here. Note: only for SHM hugetlbfs file, the inode + * flag S_PRIVATE is set. + */ + if (inode->i_flags & S_PRIVATE) + vm_flags |= VM_NORESERVE; + if (!hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, - vma->vm_flags)) + vm_flags)) goto out; ret = 0; @@ -1350,6 +1361,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par { struct hugetlbfs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; + struct hstate *h; char *rest; unsigned long ps; int opt; @@ -1394,11 +1406,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_pagesize: ps = memparse(param->string, &rest); - ctx->hstate = size_to_hstate(ps); - if (!ctx->hstate) { + h = size_to_hstate(ps); + if (!h) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } + ctx->hstate = h; return 0; case Opt_min_size: diff --git a/fs/ioctl.c b/fs/ioctl.c index 80ac36aea913..2bd05509295a 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -916,8 +916,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, if (!f.file) return -EBADF; - /* RED-PEN how should LSM module know it's handling 32bit? */ - error = security_file_ioctl(f.file, cmd, arg); + error = security_file_ioctl_compat(f.file, cmd, arg); if (error) goto out; diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 4d56f6081a5d..4462274e325a 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -63,10 +63,10 @@ */ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); -static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); -static int dbBackSplit(dmtree_t * tp, int leafno); -static int dbJoin(dmtree_t * tp, int leafno, int newval); -static void dbAdjTree(dmtree_t * tp, int leafno, int newval); +static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl); +static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl); +static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl); +static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl); static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level); static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); @@ -2103,7 +2103,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, * system. */ if (dp->tree.stree[word] == NOFREE) - dbBackSplit((dmtree_t *) & dp->tree, word); + dbBackSplit((dmtree_t *)&dp->tree, word, false); dbAllocBits(bmp, dp, blkno, nblocks); } @@ -2189,7 +2189,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * the binary system of the leaves if need be. */ dbSplit(tp, word, BUDMIN, - dbMaxBud((u8 *) & dp->wmap[word])); + dbMaxBud((u8 *)&dp->wmap[word]), false); word += 1; } else { @@ -2229,7 +2229,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * system of the leaves to reflect the current * allocation (size). */ - dbSplit(tp, word, size, NOFREE); + dbSplit(tp, word, size, NOFREE, false); /* get the number of dmap words handled */ nw = BUDSIZE(size, BUDMIN); @@ -2336,7 +2336,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, /* update the leaf for this dmap word. */ rc = dbJoin(tp, word, - dbMaxBud((u8 *) & dp->wmap[word])); + dbMaxBud((u8 *)&dp->wmap[word]), false); if (rc) return rc; @@ -2369,7 +2369,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, /* update the leaf. */ - rc = dbJoin(tp, word, size); + rc = dbJoin(tp, word, size, false); if (rc) return rc; @@ -2521,16 +2521,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) * that it is at the front of a binary buddy system. */ if (oldval == NOFREE) { - rc = dbBackSplit((dmtree_t *) dcp, leafno); + rc = dbBackSplit((dmtree_t *)dcp, leafno, true); if (rc) { release_metapage(mp); return rc; } oldval = dcp->stree[ti]; } - dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); + dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval, true); } else { - rc = dbJoin((dmtree_t *) dcp, leafno, newval); + rc = dbJoin((dmtree_t *) dcp, leafno, newval, true); if (rc) { release_metapage(mp); return rc; @@ -2561,7 +2561,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) */ if (alloc) { dbJoin((dmtree_t *) dcp, leafno, - oldval); + oldval, true); } else { /* the dbJoin() above might have * caused a larger binary buddy system @@ -2571,9 +2571,9 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) */ if (dcp->stree[ti] == NOFREE) dbBackSplit((dmtree_t *) - dcp, leafno); + dcp, leafno, true); dbSplit((dmtree_t *) dcp, leafno, - dcp->budmin, oldval); + dcp->budmin, oldval, true); } /* release the buffer and return the error. @@ -2621,7 +2621,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ -static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) +static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl) { int budsz; int cursz; @@ -2643,7 +2643,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) while (cursz >= splitsz) { /* update the buddy's leaf with its new value. */ - dbAdjTree(tp, leafno ^ budsz, cursz); + dbAdjTree(tp, leafno ^ budsz, cursz, is_ctl); /* on to the next size and buddy. */ @@ -2655,7 +2655,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) /* adjust the dmap tree to reflect the specified leaf's new * value. */ - dbAdjTree(tp, leafno, newval); + dbAdjTree(tp, leafno, newval, is_ctl); } @@ -2686,7 +2686,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ -static int dbBackSplit(dmtree_t * tp, int leafno) +static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl) { int budsz, bud, w, bsz, size; int cursz; @@ -2737,7 +2737,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno) * system in two. */ cursz = leaf[bud] - 1; - dbSplit(tp, bud, cursz, cursz); + dbSplit(tp, bud, cursz, cursz, is_ctl); break; } } @@ -2765,7 +2765,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno) * * RETURN VALUES: none */ -static int dbJoin(dmtree_t * tp, int leafno, int newval) +static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) { int budsz, buddy; s8 *leaf; @@ -2820,12 +2820,12 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) if (leafno < buddy) { /* leafno is the left buddy. */ - dbAdjTree(tp, buddy, NOFREE); + dbAdjTree(tp, buddy, NOFREE, is_ctl); } else { /* buddy is the left buddy and becomes * leafno. */ - dbAdjTree(tp, leafno, NOFREE); + dbAdjTree(tp, leafno, NOFREE, is_ctl); leafno = buddy; } @@ -2838,7 +2838,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) /* update the leaf value. */ - dbAdjTree(tp, leafno, newval); + dbAdjTree(tp, leafno, newval, is_ctl); return 0; } @@ -2859,15 +2859,20 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) * * RETURN VALUES: none */ -static void dbAdjTree(dmtree_t * tp, int leafno, int newval) +static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl) { int lp, pp, k; - int max; + int max, size; + + size = is_ctl ? CTLTREESIZE : TREESIZE; /* pick up the index of the leaf for this leafno. */ lp = leafno + le32_to_cpu(tp->dmt_leafidx); + if (WARN_ON_ONCE(lp >= size || lp < 0)) + return; + /* is the current value the same as the old value ? if so, * there is nothing to do. */ diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 92b7c533407c..031d8f570f58 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -633,6 +633,11 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { index = base + (lim >> 1); + if (stbl[index] < 0) { + rc = -EIO; + goto out; + } + if (p->header.flag & BT_LEAF) { /* uppercase leaf name to compare */ cmp = @@ -1970,7 +1975,7 @@ static int dtSplitRoot(tid_t tid, do { f = &rp->slot[fsi]; fsi = f->next; - } while (fsi != -1); + } while (fsi >= 0); f->next = n; } diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 6ed2e1d4c894..ac42f8ee553f 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -2179,6 +2179,9 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* get the ag and iag numbers for this iag. */ agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); + if (agno >= MAXAG || agno < 0) + return -EIO; + iagno = le32_to_cpu(iagp->iagnum); /* check if this is the last free extent within the diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 48d1f70f786c..21d8d4a5c67a 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -172,15 +172,15 @@ int jfs_mount(struct super_block *sb) } jfs_info("jfs_mount: ipimap:0x%p", ipimap); - /* map further access of per fileset inodes by the fileset inode */ - sbi->ipimap = ipimap; - /* initialize fileset inode allocation map */ if ((rc = diMount(ipimap))) { jfs_err("jfs_mount: diMount failed w/rc = %d", rc); goto err_ipimap; } + /* map further access of per fileset inodes by the fileset inode */ + sbi->ipimap = ipimap; + return rc; /* diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 44842e6cf0a9..a00e11ebfa77 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -669,6 +669,18 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, { struct kernfs_node *kn; + if (parent->mode & S_ISGID) { + /* this code block imitates inode_init_owner() for + * kernfs + */ + + if (parent->iattr) + gid = parent->iattr->ia_gid; + + if (flags & KERNFS_DIR) + mode |= S_ISGID; + } + kn = __kernfs_new_node(kernfs_root(parent), parent, name, mode, uid, gid, flags); if (kn) { diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 284b019cb652..b72023a6b4c1 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, *filp = file; /* Set up the missing parts of the file_lock structure */ + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_start = (loff_t)lock->lock_start; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 9c1aa75441e1..4e30f3c50970 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -659,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) nlmsvc_cancel_blocked(net, file, lock); lock->fl.fl_type = F_UNLCK; - if (file->f_file[O_RDONLY]) - error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_RDONLY]; + if (lock->fl.fl_file) + error = vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); - if (file->f_file[O_WRONLY]) - error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_WRONLY]; + if (lock->fl.fl_file) + error |= vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; @@ -697,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l block = nlmsvc_lookup_block(file, lock); mutex_unlock(&file->f_mutex); if (block != NULL) { - mode = lock_to_openmode(&lock->fl); - vfs_cancel_lock(block->b_file->f_file[mode], - &block->b_call->a_args.lock.fl); + struct file_lock *fl = &block->b_call->a_args.lock.fl; + + mode = lock_to_openmode(fl); + vfs_cancel_lock(block->b_file->f_file[mode], fl); status = nlmsvc_unlink_block(block); nlmsvc_release_block(block); } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index e35c05e27806..32784f508c81 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ mode = lock_to_openmode(&lock->fl); + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 3515f17eaf3f..e3b6229e7ae5 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct nlm_host *lockhost; if (!flctx || list_empty_careful(&flctx->flc_posix)) @@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; diff --git a/fs/locks.c b/fs/locks.c index 1047ab2b15e9..7d0918b8fe5d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type) struct file_lock_context *ctx; /* paired with cmpxchg() below */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; @@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type) */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); } out: trace_locks_get_lock_context(inode, type, ctx); @@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, void locks_free_lock_context(struct inode *inode) { - struct file_lock_context *ctx = inode->i_flctx; + struct file_lock_context *ctx = locks_inode_context(inode); if (unlikely(ctx)) { locks_check_ctx_lists(inode); @@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) void *owner; void (*func)(void); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; return; @@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) new_fl->fl_flags = type; /* typically we will check that ctx is non-NULL before calling */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; @@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) struct file_lock_context *ctx; struct file_lock *fl; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); fl = list_first_entry_or_null(&ctx->flc_lease, @@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp) int type = F_UNLCK; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner) struct file_lock_context *ctx; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; @@ -2562,7 +2562,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty(&ctx->flc_posix)) return; @@ -2635,7 +2635,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); + ctx = locks_inode_context(locks_inode(filp)); if (!ctx) return; @@ -2682,7 +2682,7 @@ bool vfs_inode_has_locks(struct inode *inode) struct file_lock_context *ctx; bool ret; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return false; @@ -2863,7 +2863,7 @@ void show_fd_locks(struct seq_file *f, struct file_lock_context *ctx; int id = 0; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return; diff --git a/fs/namei.c b/fs/namei.c index b5578f4ce5d6..9e336b408ed1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3017,20 +3017,14 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) p = d_ancestor(p2, p1); if (p) { inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p1->d_inode, I_MUTEX_CHILD); + inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2); return p; } p = d_ancestor(p1, p2); - if (p) { - inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p2->d_inode, I_MUTEX_CHILD); - return p; - } - - lock_two_inodes(p1->d_inode, p2->d_inode, - I_MUTEX_PARENT, I_MUTEX_PARENT2); - return NULL; + inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); + inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); + return p; } /* @@ -4735,11 +4729,12 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * * a) we can get into loop creation. * b) race potential - two innocent renames can create a loop together. - * That's where 4.4 screws up. Current fix: serialization on + * That's where 4.4BSD screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. - * c) we have to lock _four_ objects - parents and victim (if it exists), - * and source. + * c) we may have to lock up to _four_ objects - parents and victim (if it exists), + * and source (if it's a non-directory or a subdirectory that moves to + * different parent). * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change @@ -4771,6 +4766,7 @@ int vfs_rename(struct renamedata *rd) bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; struct name_snapshot old_name; + bool lock_old_subdir, lock_new_subdir; if (source == target) return 0; @@ -4824,15 +4820,32 @@ int vfs_rename(struct renamedata *rd) take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); /* - * Lock all moved children. Moved directories may need to change parent - * pointer so they need the lock to prevent against concurrent - * directory changes moving parent pointer. For regular files we've - * historically always done this. The lockdep locking subclasses are - * somewhat arbitrary but RENAME_EXCHANGE in particular can swap - * regular files and directories so it's difficult to tell which - * subclasses to use. + * Lock children. + * The source subdirectory needs to be locked on cross-directory + * rename or cross-directory exchange since its parent changes. + * The target subdirectory needs to be locked on cross-directory + * exchange due to parent change and on any rename due to becoming + * a victim. + * Non-directories need locking in all cases (for NFS reasons); + * they get locked after any subdirectories (in inode address order). + * + * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE. + * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex. */ - lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2); + lock_old_subdir = new_dir != old_dir; + lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE); + if (is_dir) { + if (lock_old_subdir) + inode_lock_nested(source, I_MUTEX_CHILD); + if (target && (!new_is_dir || lock_new_subdir)) + inode_lock(target); + } else if (new_is_dir) { + if (lock_new_subdir) + inode_lock_nested(target, I_MUTEX_CHILD); + inode_lock(source); + } else { + lock_two_nondirectories(source, target); + } error = -EPERM; if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target))) @@ -4880,8 +4893,9 @@ int vfs_rename(struct renamedata *rd) d_exchange(old_dentry, new_dentry); } out: - inode_unlock(source); - if (target) + if (!is_dir || lock_old_subdir) + inode_unlock(source); + if (target && (!new_is_dir || lock_new_subdir)) inode_unlock(target); dput(new_dentry); if (!error) { diff --git a/fs/namespace.c b/fs/namespace.c index 29a8d90dd107..1533550f7356 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4172,10 +4172,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) /* * If this is an attached mount make sure it's located in the callers * mount namespace. If it's not don't let the caller interact with it. - * If this is a detached mount make sure it has an anonymous mount - * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE. + * + * If this mount doesn't have a parent it's most often simply a + * detached mount with an anonymous mount namespace. IOW, something + * that's simply not attached yet. But there are apparently also users + * that do change mount properties on the rootfs itself. That obviously + * neither has a parent nor is it a detached mount so we cannot + * unconditionally check for detached mounts. */ - if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns))) + if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt)) goto out; /* diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8fdb65e1b14a..b555efca01d2 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -647,10 +647,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) LIST_HEAD(mds_list); nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_commit_begin(cinfo.mds); nfs_scan_commit(dreq->inode, &mds_list, &cinfo); res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); - if (res < 0) /* res == -ENOMEM */ - nfs_direct_write_reschedule(dreq); + if (res < 0) { /* res == -ENOMEM */ + spin_lock(&dreq->lock); + if (dreq->flags == 0) + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); + } + if (nfs_commit_end(cinfo.mds)) + nfs_direct_write_complete(dreq); } static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 01596f2d0a1e..9fe9586a51b7 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -156,7 +156,10 @@ const struct export_operations nfs_export_ops = { .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, .fetch_iversion = nfs_fetch_iversion, - .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| - EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| - EXPORT_OP_NOATOMIC_ATTR, + .flags = EXPORT_OP_NOWCC | + EXPORT_OP_NOSUBTREECHK | + EXPORT_OP_CLOSE_BEFORE_UNLINK | + EXPORT_OP_REMOTE_FS | + EXPORT_OP_NOATOMIC_ATTR | + EXPORT_OP_FLUSH_ON_CLOSE, }; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 81bbafab18a9..4376881be791 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2016,7 +2016,7 @@ static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg) for (idx = 0; idx < flseg->mirror_array_cnt; idx++) { mirror = flseg->mirror_array[idx]; mirror_ds = mirror->mirror_ds; - if (!mirror_ds) + if (IS_ERR_OR_NULL(mirror_ds)) continue; ds = mirror->mirror_ds->ds; if (!ds) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index b59876b01a1e..0282d93c8bcc 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -55,11 +55,14 @@ int nfs42_proc_removexattr(struct inode *inode, const char *name); * They would be 7 bytes long in the eventual buffer ("user.x\0"), and * 8 bytes long XDR-encoded. * - * Include the trailing eof word as well. + * Include the trailing eof word as well and make the result a multiple + * of 4 bytes. */ static inline u32 nfs42_listxattr_xdrsize(u32 buflen) { - return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4; + u32 size = 8 * buflen / (XATTR_USER_PREFIX_LEN + 2) + 4; + + return (size + 3) & ~3; } #endif /* CONFIG_NFS_V4_2 */ #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ec3f0103e1a7..7cc74f7451d6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10592,29 +10592,33 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { ssize_t error, error2, error3; + size_t left = size; - error = generic_listxattr(dentry, list, size); + error = generic_listxattr(dentry, list, left); if (error < 0) return error; if (list) { list += error; - size -= error; + left -= error; } - error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size); + error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left); if (error2 < 0) return error2; if (list) { list += error2; - size -= error2; + left -= error2; } - error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size); + error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left); if (error3 < 0) return error3; - return error + error2 + error3; + error += error2 + error3; + if (size && error > size) + return -ERANGE; + return error; } static void nfs4_enable_swap(struct inode *inode) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 3fa77ad7258f..c8a57cfde64b 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -9,10 +9,10 @@ #define _TRACE_NFS4_H #include -#include +#include -#include -#include +#include +#include #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 620329b7e6ae..0b1c1d2e076c 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -175,10 +175,10 @@ static int __init root_nfs_cat(char *dest, const char *src, size_t len = strlen(dest); if (len && dest[len - 1] != ',') - if (strlcat(dest, ",", destlen) > destlen) + if (strlcat(dest, ",", destlen) >= destlen) return -1; - if (strlcat(dest, src, destlen) > destlen) + if (strlcat(dest, src, destlen) >= destlen) return -1; return 0; } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8c6cc58679ff..642f6921852f 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,9 +11,9 @@ #include #include -#include -#include -#include +#include +#include +#include #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f41d24b54fd1..8e21caae4cae 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page, int err; if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) + NFS_SERVER(inode)->write_congested) { + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; + } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, 0, @@ -1654,7 +1656,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo) !atomic_read(&cinfo->rpcs_out)); } -static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) { atomic_inc(&cinfo->rpcs_out); } diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f6a2fd3015e7..7c441f2bd444 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -8,6 +8,7 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL + select NFS_ACL_SUPPORT if NFSD_V3_ACL depends on MULTIUSER help Choose Y here if you want to allow other computers to access @@ -26,19 +27,29 @@ config NFSD Below you can choose which versions of the NFS protocol are available to clients mounting the NFS server on this system. - Support for NFS version 2 (RFC 1094) is always available when + Support for NFS version 3 (RFC 1813) is always available when CONFIG_NFSD is selected. If unsure, say N. -config NFSD_V2_ACL - bool +config NFSD_V2 + bool "NFS server support for NFS version 2 (DEPRECATED)" depends on NFSD + default n + help + NFSv2 (RFC 1094) was the first publicly-released version of NFS. + Unless you are hosting ancient (1990's era) NFS clients, you don't + need this. + + If unsure, say N. + +config NFSD_V2_ACL + bool "NFS server support for the NFSv2 ACL protocol extension" + depends on NFSD_V2 config NFSD_V3_ACL bool "NFS server support for the NFSv3 ACL protocol extension" depends on NFSD - select NFSD_V2_ACL help Solaris NFS servers support an auxiliary NFSv3 ACL protocol that never became an official part of the NFS version 3 protocol. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 805c06d5f1b4..6fffc8f03f74 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -10,9 +10,10 @@ obj-$(CONFIG_NFSD) += nfsd.o # this one should be compiled first, as the tracing macros can easily blow up nfsd-y += trace.o -nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ - export.o auth.o lockd.o nfscache.o nfsxdr.o \ +nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o \ stats.o filecache.o nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index e7e6e78d965d..01d7fd108cf3 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -12,6 +12,7 @@ #include "blocklayoutxdr.h" #include "pnfs.h" #include "filecache.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 2455dc8be18a..1ed2f691ebb9 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -9,6 +9,7 @@ #include "nfsd.h" #include "blocklayoutxdr.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index ee0e3aba4a6e..d03f7f6a8642 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); int exp_rootfh(struct net *, struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -__be32 nfserrno(int errno); static inline void exp_put(struct svc_export *exp) { diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 5cb8cce153a5..ee9c923192e0 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1,7 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Open file cache. + * The NFSD open file cache. * * (c) 2015 - Jeff Layton + * + * An nfsd_file object is a per-file collection of open state that binds + * together: + * - a struct file * + * - a user credential + * - a network namespace + * - a read-ahead context + * - monitoring for writeback errors + * + * nfsd_file objects are reference-counted. Consumers acquire a new + * object via the nfsd_file_acquire API. They manage their interest in + * the acquired object, and hence the object's reference count, via + * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file + * object: + * + * * non-garbage-collected: When a consumer wants to precisely control + * the lifetime of a file's open state, it acquires a non-garbage- + * collected nfsd_file. The final nfsd_file_put releases the open + * state immediately. + * + * * garbage-collected: When a consumer does not control the lifetime + * of open state, it acquires a garbage-collected nfsd_file. The + * final nfsd_file_put allows the open state to linger for a period + * during which it may be re-used. */ #include @@ -49,70 +74,9 @@ static struct list_lru nfsd_file_lru; static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static struct delayed_work nfsd_filecache_laundrette; -static struct rhashtable nfsd_file_rhash_tbl +static struct rhltable nfsd_file_rhltable ____cacheline_aligned_in_smp; -enum nfsd_file_lookup_type { - NFSD_FILE_KEY_INODE, - NFSD_FILE_KEY_FULL, -}; - -struct nfsd_file_lookup_key { - struct inode *inode; - struct net *net; - const struct cred *cred; - unsigned char need; - bool gc; - enum nfsd_file_lookup_type type; -}; - -/* - * The returned hash value is based solely on the address of an in-code - * inode, a pointer to a slab-allocated object. The entropy in such a - * pointer is concentrated in its middle bits. - */ -static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) -{ - unsigned long ptr = (unsigned long)inode; - u32 k; - - k = ptr >> L1_CACHE_SHIFT; - k &= 0x00ffffff; - return jhash2(&k, 1, seed); -} - -/** - * nfsd_file_key_hashfn - Compute the hash value of a lookup key - * @data: key on which to compute the hash value - * @len: rhash table's key_len parameter (unused) - * @seed: rhash table's random seed of the day - * - * Return value: - * Computed 32-bit hash value - */ -static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) -{ - const struct nfsd_file_lookup_key *key = data; - - return nfsd_file_inode_hash(key->inode, seed); -} - -/** - * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file - * @data: object on which to compute the hash value - * @len: rhash table's key_len parameter (unused) - * @seed: rhash table's random seed of the day - * - * Return value: - * Computed 32-bit hash value - */ -static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) -{ - const struct nfsd_file *nf = data; - - return nfsd_file_inode_hash(nf->nf_inode, seed); -} - static bool nfsd_match_cred(const struct cred *c1, const struct cred *c2) { @@ -133,65 +97,25 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2) return true; } -/** - * nfsd_file_obj_cmpfn - Match a cache item against search criteria - * @arg: search criteria - * @ptr: cache item to check - * - * Return values: - * %0 - Item matches search criteria - * %1 - Item does not match search criteria - */ -static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, - const void *ptr) -{ - const struct nfsd_file_lookup_key *key = arg->key; - const struct nfsd_file *nf = ptr; - - switch (key->type) { - case NFSD_FILE_KEY_INODE: - if (nf->nf_inode != key->inode) - return 1; - break; - case NFSD_FILE_KEY_FULL: - if (nf->nf_inode != key->inode) - return 1; - if (nf->nf_may != key->need) - return 1; - if (nf->nf_net != key->net) - return 1; - if (!nfsd_match_cred(nf->nf_cred, key->cred)) - return 1; - if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) - return 1; - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) - return 1; - break; - } - return 0; -} - static const struct rhashtable_params nfsd_file_rhash_params = { .key_len = sizeof_field(struct nfsd_file, nf_inode), .key_offset = offsetof(struct nfsd_file, nf_inode), - .head_offset = offsetof(struct nfsd_file, nf_rhash), - .hashfn = nfsd_file_key_hashfn, - .obj_hashfn = nfsd_file_obj_hashfn, - .obj_cmpfn = nfsd_file_obj_cmpfn, - /* Reduce resizing churn on light workloads */ - .min_size = 512, /* buckets */ + .head_offset = offsetof(struct nfsd_file, nf_rlist), + + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, .automatic_shrinking = true, }; static void nfsd_file_schedule_laundrette(void) { - if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || - test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) - return; - - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, - NFSD_LAUNDRETTE_DELAY); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -285,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) } static struct nfsd_file * -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) +nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, + bool want_gc) { struct nfsd_file *nf; nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); - if (nf) { - INIT_LIST_HEAD(&nf->nf_lru); - nf->nf_birthtime = ktime_get(); - nf->nf_file = NULL; - nf->nf_cred = get_current_cred(); - nf->nf_net = key->net; - nf->nf_flags = 0; - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - if (key->gc) - __set_bit(NFSD_FILE_GC, &nf->nf_flags); - nf->nf_inode = key->inode; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = key->need; - nf->nf_mark = NULL; - } + if (unlikely(!nf)) + return NULL; + + INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_birthtime = ktime_get(); + nf->nf_file = NULL; + nf->nf_cred = get_current_cred(); + nf->nf_net = net; + nf->nf_flags = want_gc ? + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); + nf->nf_inode = inode; + refcount_set(&nf->nf_ref, 1); + nf->nf_may = need; + nf->nf_mark = NULL; return nf; } @@ -330,8 +254,8 @@ static void nfsd_file_hash_remove(struct nfsd_file *nf) { trace_nfsd_file_unhash(nf); - rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, - nfsd_file_rhash_params); + rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, + nfsd_file_rhash_params); } static bool @@ -358,10 +282,8 @@ nfsd_file_free(struct nfsd_file *nf) if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { - get_file(nf->nf_file); - filp_close(nf->nf_file, NULL); nfsd_file_check_write_error(nf); - fput(nf->nf_file); + filp_close(nf->nf_file, NULL); } /* @@ -380,13 +302,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) struct file *file = nf->nf_file; struct address_space *mapping; - if (!file || !(file->f_mode & FMODE_WRITE)) + /* File not open for write? */ + if (!(file->f_mode & FMODE_WRITE)) return false; + + /* + * Some filesystems (e.g. NFS) flush all dirty data on close. + * On others, there is no need to wait for writeback. + */ + if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) + return false; + mapping = file->f_mapping; return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } + static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); @@ -409,7 +341,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf) struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { - if (likely(refcount_inc_not_zero(&nf->nf_ref))) + if (nf && refcount_inc_not_zero(&nf->nf_ref)) return nf; return NULL; } @@ -470,49 +402,26 @@ nfsd_file_dispose_list(struct list_head *dispose) } } -static void -nfsd_file_list_remove_disposal(struct list_head *dst, - struct nfsd_fcache_disposal *l) -{ - spin_lock(&l->lock); - list_splice_init(&l->freeme, dst); - spin_unlock(&l->lock); -} - -static void -nfsd_file_list_add_disposal(struct list_head *files, struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct nfsd_fcache_disposal *l = nn->fcache_disposal; - - spin_lock(&l->lock); - list_splice_tail_init(files, &l->freeme); - spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); -} - -static void -nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, - struct net *net) -{ - struct nfsd_file *nf, *tmp; - - list_for_each_entry_safe(nf, tmp, src, nf_lru) { - if (nf->nf_net == net) - list_move_tail(&nf->nf_lru, dst); - } -} - +/** + * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list + * @dispose: list of nfsd_files to be disposed + * + * Transfers each file to the "freeme" list for its nfsd_net, to eventually + * be disposed of by the per-net garbage collector. + */ static void nfsd_file_dispose_list_delayed(struct list_head *dispose) { - LIST_HEAD(list); - struct nfsd_file *nf; - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); - nfsd_file_list_add_disposal(&list, nf->nf_net); + struct nfsd_file *nf = list_first_entry(dispose, + struct nfsd_file, nf_lru); + struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; + + spin_lock(&l->lock); + list_move_tail(&nf->nf_lru, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); } } @@ -589,7 +498,8 @@ static void nfsd_file_gc_worker(struct work_struct *work) { nfsd_file_gc(); - nfsd_file_schedule_laundrette(); + if (list_lru_count(&nfsd_file_lru)) + nfsd_file_schedule_laundrette(); } static unsigned long @@ -655,8 +565,8 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) * @inode: inode on which to close out nfsd_files * @dispose: list on which to gather nfsd_files to close out * - * An nfsd_file represents a struct file being held open on behalf of nfsd. An - * open file however can block other activity (such as leases), or cause + * An nfsd_file represents a struct file being held open on behalf of nfsd. + * An open file however can block other activity (such as leases), or cause * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). * * This function is intended to find open nfsd_files when this sort of @@ -669,20 +579,17 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) static void nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_INODE, - .inode = inode, - }; + struct rhlist_head *tmp, *list; struct nfsd_file *nf; rcu_read_lock(); - do { - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params); - if (!nf) - break; + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { + if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) + continue; nfsd_file_cond_queue(nf, dispose); - } while (1); + } rcu_read_unlock(); } @@ -735,8 +642,8 @@ nfsd_file_close_inode_sync(struct inode *inode) * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and destroy any entries that have not been used since - * the last scan. + * Scrape the freeme list for this nfsd_net, and then dispose of them + * all. */ static void nfsd_file_delayed_close(struct work_struct *work) @@ -745,7 +652,10 @@ nfsd_file_delayed_close(struct work_struct *work) struct nfsd_fcache_disposal *l = container_of(work, struct nfsd_fcache_disposal, work); - nfsd_file_list_remove_disposal(&head, l); + spin_lock(&l->lock); + list_splice_init(&l->freeme, &head); + spin_unlock(&l->lock); + nfsd_file_dispose_list(&head); } @@ -806,7 +716,7 @@ nfsd_file_cache_init(void) if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; - ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); + ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); if (ret) return ret; @@ -874,7 +784,7 @@ out_err: nfsd_file_mark_slab = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; - rhashtable_destroy(&nfsd_file_rhash_tbl); + rhltable_destroy(&nfsd_file_rhltable); goto out; } @@ -883,7 +793,8 @@ out_err: * @net: net-namespace to shut down the cache (may be NULL) * * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, - * then close out everything. Called when an nfsd instance is being shut down. + * then close out everything. Called when an nfsd instance is being shut down, + * and when the exports table is flushed. */ static void __nfsd_file_cache_purge(struct net *net) @@ -892,7 +803,7 @@ __nfsd_file_cache_purge(struct net *net) struct nfsd_file *nf; LIST_HEAD(dispose); - rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); + rhltable_walk_enter(&nfsd_file_rhltable, &iter); do { rhashtable_walk_start(&iter); @@ -998,7 +909,7 @@ nfsd_file_cache_shutdown(void) nfsd_file_mark_slab = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; - rhashtable_destroy(&nfsd_file_rhash_tbl); + rhltable_destroy(&nfsd_file_rhltable); for_each_possible_cpu(i) { per_cpu(nfsd_file_cache_hits, i) = 0; @@ -1009,6 +920,35 @@ nfsd_file_cache_shutdown(void) } } +static struct nfsd_file * +nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, + struct inode *inode, unsigned char need, + bool want_gc) +{ + struct rhlist_head *tmp, *list; + struct nfsd_file *nf; + + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { + if (nf->nf_may != need) + continue; + if (nf->nf_net != net) + continue; + if (!nfsd_match_cred(nf->nf_cred, cred)) + continue; + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) + continue; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + continue; + + if (!nfsd_file_get(nf)) + continue; + return nf; + } + return NULL; +} + /** * nfsd_file_is_cached - are there any cached open files for this inode? * @inode: inode to check @@ -1023,15 +963,20 @@ nfsd_file_cache_shutdown(void) bool nfsd_file_is_cached(struct inode *inode) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_INODE, - .inode = inode, - }; + struct rhlist_head *tmp, *list; + struct nfsd_file *nf; bool ret = false; - if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params) != NULL) - ret = true; + rcu_read_lock(); + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) + if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { + ret = true; + break; + } + rcu_read_unlock(); + trace_nfsd_file_is_cached(inode, (int)ret); return ret; } @@ -1041,14 +986,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **pnf, bool want_gc) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_FULL, - .need = may_flags & NFSD_FILE_MAY_MASK, - .net = SVC_NET(rqstp), - .gc = want_gc, - }; + unsigned char need = may_flags & NFSD_FILE_MAY_MASK; + struct net *net = SVC_NET(rqstp); + struct nfsd_file *new, *nf; + const struct cred *cred; bool open_retry = true; - struct nfsd_file *nf; + struct inode *inode; __be32 status; int ret; @@ -1056,81 +999,88 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; - key.inode = d_inode(fhp->fh_dentry); - key.cred = get_current_cred(); + inode = d_inode(fhp->fh_dentry); + cred = get_current_cred(); retry: rcu_read_lock(); - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params); - if (nf) - nf = nfsd_file_get(nf); + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); rcu_read_unlock(); if (nf) { + /* + * If the nf is on the LRU then it holds an extra reference + * that must be put if it's removed. It had better not be + * the last one however, since we should hold another. + */ if (nfsd_file_lru_remove(nf)) WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; } - nf = nfsd_file_alloc(&key, may_flags); - if (!nf) { + new = nfsd_file_alloc(net, inode, need, want_gc); + if (!new) { status = nfserr_jukebox; - goto out_status; + goto out; } - ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, - &key, &nf->nf_rhash, - nfsd_file_rhash_params); + rcu_read_lock(); + spin_lock(&inode->i_lock); + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); + if (unlikely(nf)) { + spin_unlock(&inode->i_lock); + rcu_read_unlock(); + nfsd_file_slab_free(&new->nf_rcu); + goto wait_for_construction; + } + nf = new; + ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, + nfsd_file_rhash_params); + spin_unlock(&inode->i_lock); + rcu_read_unlock(); if (likely(ret == 0)) goto open_file; - nfsd_file_slab_free(&nf->nf_rcu); - nf = NULL; if (ret == -EEXIST) goto retry; - trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); + trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); status = nfserr_jukebox; - goto out_status; + goto construction_err; wait_for_construction: wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); + trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); if (!open_retry) { status = nfserr_jukebox; - goto out; + goto construction_err; } open_retry = false; - if (refcount_dec_and_test(&nf->nf_ref)) - nfsd_file_free(nf); goto retry; } - this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); + if (status != nfs_ok) { + nfsd_file_put(nf); + nf = NULL; + } + out: if (status == nfs_ok) { this_cpu_inc(nfsd_file_acquisitions); nfsd_file_check_write_error(nf); *pnf = nf; - } else { - if (refcount_dec_and_test(&nf->nf_ref)) - nfsd_file_free(nf); - nf = NULL; } - -out_status: - put_cred(key.cred); - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + put_cred(cred); + trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); + nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); if (nf->nf_mark) { if (file) { get_file(file); @@ -1148,13 +1098,16 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status == nfs_ok && key.inode->i_nlink == 0) - status = nfserr_jukebox; - if (status != nfs_ok) + if (status != nfs_ok || inode->i_nlink == 0) nfsd_file_unhash(nf); - clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); - smp_mb__after_atomic(); - wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); + if (status == nfs_ok) + goto out; + +construction_err: + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); + nf = NULL; goto out; } @@ -1170,8 +1123,11 @@ open_file: * seconds after the final nfsd_file_put() in case the caller * wants to re-use it. * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1191,8 +1147,11 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, * but not garbage-collected. The object is unhashed after the * final nfsd_file_put(). * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1213,8 +1172,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * and @file is non-NULL, use it to instantiate a new nfsd_file instead of * opening a new one. * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1245,7 +1207,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) lru = list_lru_count(&nfsd_file_lru); rcu_read_lock(); - ht = &nfsd_file_rhash_tbl; + ht = &nfsd_file_rhltable.ht; count = atomic_read(&ht->nelems); tbl = rht_dereference_rcu(ht->tbl, ht); buckets = tbl->size; @@ -1261,7 +1223,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) evictions += per_cpu(nfsd_file_evictions, i); } - seq_printf(m, "total entries: %u\n", count); + seq_printf(m, "total inodes: %u\n", count); seq_printf(m, "hash buckets: %u\n", buckets); seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 41516a4263ea..e54165a3224f 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -29,9 +29,8 @@ struct nfsd_file_mark { * never be dereferenced, only used for comparison. */ struct nfsd_file { - struct rhash_head nf_rhash; - struct list_head nf_lru; - struct rcu_head nf_rcu; + struct rhlist_head nf_rlist; + void *nf_inode; struct file *nf_file; const struct cred *nf_cred; struct net *nf_net; @@ -40,10 +39,12 @@ struct nfsd_file { #define NFSD_FILE_REFERENCED (2) #define NFSD_FILE_GC (3) unsigned long nf_flags; - struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; unsigned char nf_may; + struct nfsd_file_mark *nf_mark; + struct list_head nf_lru; + struct rcu_head nf_rcu; ktime_t nf_birthtime; }; diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index 070f90ed09b6..3ca5304440ff 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -15,6 +15,7 @@ #include "flexfilelayoutxdr.h" #include "pnfs.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8c854ba3285b..51a4b7885cae 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -195,7 +195,7 @@ struct nfsd_net { atomic_t nfsd_courtesy_clients; struct shrinker nfsd_client_shrinker; - struct delayed_work nfsd_shrinker_work; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 39989c14c8a1..4eae2c5af2ed 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p) * 1 Protocol" */ +static void encode_uint32(struct xdr_stream *xdr, u32 n) +{ + WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0); +} + +static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, + size_t len) +{ + WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); +} + /* * nfs_cb_opnum4 * @@ -328,6 +339,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, hdr->nops++; } +/* + * CB_RECALLANY4args + * + * struct CB_RECALLANY4args { + * uint32_t craa_objects_to_keep; + * bitmap4 craa_type_mask; + * }; + */ +static void +encode_cb_recallany4args(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra) +{ + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY); + encode_uint32(xdr, ra->ra_keep); + encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval)); + hdr->nops++; +} + /* * CB_SEQUENCE4args * @@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, encode_cb_nops(&hdr); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static void +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfsd4_callback *cb = data; + struct nfsd4_cb_recall_any *ra; + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb); + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_recallany4args(xdr, &hdr, ra); + encode_cb_nops(&hdr); +} /* * NFSv4.0 and NFSv4.1 XDR decode functions @@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static int +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status); + return status; +} + #ifdef CONFIG_NFSD_PNFS /* * CB_LAYOUTRECALL4args @@ -783,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { #endif PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index e70a1a2999b7..5e9809aff37e 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -41,6 +41,7 @@ #include "idmap.h" #include "nfsd.h" #include "netns.h" +#include "vfs.h" /* * Turn off idmapping when using AUTH_SYS. diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a9105e95b59c..ba53cd89ec62 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -943,12 +943,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, &read->rd_nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); - goto out; - } - status = nfs_ok; -out: + read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -1117,10 +1112,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, WR_STATE, NULL, NULL); - if (status) { - dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); + if (status) return status; - } } err = fh_want_write(&cstate->current_fh); if (err) @@ -1170,10 +1163,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, stateid, WR_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); + if (status) return status; - } write->wr_how_written = write->wr_stable_how; @@ -1204,17 +1195,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); + if (status) goto out; - } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, dst_stateid, WR_STATE, dst, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); + if (status) goto out_put_src; - } /* fix up for NFS-specific error code */ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) || @@ -1935,10 +1922,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, WR_STATE, &nf, NULL); - if (status != nfs_ok) { - dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); + if (status != nfs_ok) return status; - } status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file, fallocate->falloc_offset, @@ -1994,10 +1979,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, RD_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); + if (status) return status; - } switch (seek->seek_whence) { case NFS4_CONTENT_DATA: diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index faecdbfa01a2..e4522e86e984 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,7 +44,9 @@ #include #include #include +#include #include + #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); +static void nfsd4_file_hash_remove(struct nfs4_file *fi); /* Locking: */ @@ -588,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) void put_nfs4_file(struct nfs4_file *fi) { - might_lock(&state_lock); - - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { - hlist_del_rcu(&fi->fi_hash); - spin_unlock(&state_lock); + if (refcount_dec_and_test(&fi->fi_ref)) { + nfsd4_file_hash_remove(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); @@ -602,9 +602,7 @@ put_nfs4_file(struct nfs4_file *fi) static struct nfsd_file * __nfs4_get_fd(struct nfs4_file *f, int oflag) { - if (f->fi_fds[oflag]) - return nfsd_file_get(f->fi_fds[oflag]); - return NULL; + return nfsd_file_get(f->fi_fds[oflag]); } static struct nfsd_file * @@ -717,19 +715,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -/* hash table for nfs4_file */ -#define FILE_HASH_BITS 8 -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) +static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; -static unsigned int file_hashval(struct svc_fh *fh) -{ - struct inode *inode = d_inode(fh->fh_dentry); +static const struct rhashtable_params nfs4_file_rhash_params = { + .key_len = sizeof_field(struct nfs4_file, fi_inode), + .key_offset = offsetof(struct nfs4_file, fi_inode), + .head_offset = offsetof(struct nfs4_file, fi_rlist), - /* XXX: why not (here & in file cache) use inode? */ - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); -} - -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, + .automatic_shrinking = true, +}; /* * Check if courtesy clients have conflicting access and resolve it if possible @@ -1367,6 +1366,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); + trace_nfsd_stid_revoke(&dp->dl_stid); + if (clp->cl_minorversion) { spin_lock(&clp->cl_lock); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; @@ -1831,13 +1832,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, int numslots = fattrs->maxreqs; int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; - int mem, i; + int i; - BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) - + sizeof(struct nfsd4_session) > PAGE_SIZE); - mem = numslots * sizeof(struct nfsd4_slot *); + BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) + > PAGE_SIZE); - new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); + new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); if (!new) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ @@ -2143,6 +2143,7 @@ static void __free_client(struct kref *k) kfree(clp->cl_nii_domain.data); kfree(clp->cl_nii_name.data); idr_destroy(&clp->cl_stateids); + kfree(clp->cl_ra); kmem_cache_free(client_slab, clp); } @@ -2870,6 +2871,37 @@ static const struct tree_descr client_files[] = { [3] = {""}, }; +static int +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, + struct rpc_task *task) +{ + trace_nfsd_cb_recall_any_done(cb, task); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + rpc_delay(task, 2 * HZ); + return 0; + default: + return 1; + } +} + +static void +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + put_client_renew_locked(clp); + spin_unlock(&nn->client_lock); +} + +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { + .done = nfsd4_cb_recall_any_done, + .release = nfsd4_cb_recall_any_release, +}; + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2907,6 +2939,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); + if (!clp->cl_ra) { + free_client(clp); + return NULL; + } + clp->cl_ra_time = 0; + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, + NFSPROC4_CLNT_CB_RECALL_ANY); return clp; } @@ -4276,11 +4316,9 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, - struct nfs4_file *fp) -{ - lockdep_assert_held(&state_lock); +static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) +{ refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); @@ -4298,7 +4336,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); } void @@ -4363,25 +4400,27 @@ out: } static unsigned long -nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) +nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - int cnt; + int count; struct nfsd_net *nn = container_of(shrink, struct nfsd_net, nfsd_client_shrinker); - cnt = atomic_read(&nn->nfsd_courtesy_clients); - if (cnt > 0) - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); - return (unsigned long)cnt; + count = atomic_read(&nn->nfsd_courtesy_clients); + if (!count) + count = atomic_long_read(&num_delegations); + if (count) + queue_work(laundry_wq, &nn->nfsd_shrinker_work); + return (unsigned long)count; } static unsigned long -nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) +nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { return SHRINK_STOP; } -int +void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; @@ -4403,16 +4442,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; - nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); -} - -void -nfsd4_leases_net_shutdown(struct nfsd_net *nn) -{ - unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -4683,71 +4712,80 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) nfs4_put_stid(&last->st_stid); } -/* search file_hashtbl[] for file */ -static struct nfs4_file * -find_file_locked(struct svc_fh *fh, unsigned int hashval) +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_lookup(const struct svc_fh *fhp) { - struct nfs4_file *fp; + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; + struct nfs4_file *fi; - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - return fp; + rcu_read_lock(); + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) { + rcu_read_unlock(); + return fi; + } } } + rcu_read_unlock(); return NULL; } -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, - unsigned int hashval) +/* + * On hash insertion, identify entries with the same inode but + * distinct filehandles. They will all be on the list returned + * by rhltable_lookup(). + * + * inode->i_lock prevents racing insertions from adding an entry + * for the same inode/fhp pair twice. + */ +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) { - struct nfs4_file *fp; + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; struct nfs4_file *ret = NULL; bool alias_found = false; + struct nfs4_file *fi; + int err; - spin_lock(&state_lock); - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - ret = fp; - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) - fp->fi_aliased = alias_found = true; + rcu_read_lock(); + spin_lock(&inode->i_lock); + + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) + ret = fi; + } else + fi->fi_aliased = alias_found = true; } - if (likely(ret == NULL)) { - nfsd4_init_file(fh, hashval, new); - new->fi_aliased = alias_found; - ret = new; - } - spin_unlock(&state_lock); + if (ret) + goto out_unlock; + + nfsd4_file_init(fhp, new); + err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, + nfs4_file_rhash_params); + if (err) + goto out_unlock; + + new->fi_aliased = alias_found; + ret = new; + +out_unlock: + spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ret; } -static struct nfs4_file * find_file(struct svc_fh *fh) +static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - return fp; -} - -static struct nfs4_file * -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - if (fp) - return fp; - - return insert_file(new, fh, hashval); + rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, + nfs4_file_rhash_params); } /* @@ -4760,9 +4798,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = find_file(current_fh); + fp = nfsd4_file_hash_lookup(current_fh); if (!fp) return ret; + /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) @@ -4774,7 +4813,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) static bool nfsd4_deleg_present(const struct inode *inode) { - struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx); + struct file_lock_context *ctx = locks_inode_context(inode); return ctx && !list_empty_careful(&ctx->flc_lease); } @@ -4908,10 +4947,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); - spin_unlock(&fp->fi_lock); return false; } @@ -5499,12 +5536,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (status) goto out_unlock; + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); - if (fp->fi_had_conflict) - status = -EAGAIN; - else - status = hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -5656,7 +5694,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(open->op_file, current_fh); + fp = nfsd4_file_hash_insert(open->op_file, current_fh); + if (unlikely(!fp)) + return nfserr_jukebox; if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -5933,7 +5973,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { nf = stp->st_stid.sc_file; - ctx = nf->fi_inode->i_flctx; + ctx = locks_inode_context(nf->fi_inode); if (!ctx) continue; if (locks_owner_has_blockers(ctx, lo)) @@ -6161,17 +6201,63 @@ laundromat_main(struct work_struct *laundry) } static void -courtesy_client_reaper(struct work_struct *reaper) +courtesy_client_reaper(struct nfsd_net *nn) { struct list_head reaplist; - struct delayed_work *dwork = to_delayed_work(reaper); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, - nfsd_shrinker_work); nfs4_get_courtesy_client_reaplist(nn, &reaplist); nfs4_process_client_reaplist(&reaplist); } +static void +deleg_reaper(struct nfsd_net *nn) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + struct list_head cblist; + + INIT_LIST_HEAD(&cblist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state != NFSD4_ACTIVE || + list_empty(&clp->cl_delegations) || + atomic_read(&clp->cl_delegs_in_recall) || + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || + (ktime_get_boottime_seconds() - + clp->cl_ra_time < 5)) { + continue; + } + list_add(&clp->cl_ra_cblist, &cblist); + + /* release in nfsd4_cb_recall_any_release */ + atomic_inc(&clp->cl_rpc_users); + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + clp->cl_ra_time = ktime_get_boottime_seconds(); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&cblist)) { + clp = list_first_entry(&cblist, struct nfs4_client, + cl_ra_cblist); + list_del_init(&clp->cl_ra_cblist); + clp->cl_ra->ra_keep = 0; + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); + trace_nfsd_cb_recall_any(clp->cl_ra); + nfsd4_run_cb(&clp->cl_ra->ra_cb); + } +} + +static void +nfsd4_state_shrinker_worker(struct work_struct *work) +{ + struct nfsd_net *nn = container_of(work, struct nfsd_net, + nfsd_shrinker_work); + + courtesy_client_reaper(nn); + deleg_reaper(nn); +} + static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) @@ -6936,6 +7022,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto put_stateid; + trace_nfsd_deleg_return(stateid); wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); put_stateid: @@ -7736,18 +7823,20 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; - struct nfsd_file *nf = find_any_file(fp); + struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; + spin_lock(&fp->fi_lock); + nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); - return status; + goto out; } inode = locks_inode(nf->nf_file); - flctx = inode->i_flctx; + flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); @@ -7759,7 +7848,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } spin_unlock(&flctx->flc_lock); } - nfsd_file_put(nf); +out: + spin_unlock(&fp->fi_lock); return status; } @@ -7769,10 +7859,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * - * The lockowner's so_count is bumped when a lock record is added - * or when copying a conflicting lock. The latter case is brief, - * but can lead to fleeting false positives when looking for - * locks-in-use. + * Check if theree are any locks still held and if not - free the lockowner + * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found @@ -7808,10 +7896,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, spin_unlock(&clp->cl_lock); return nfs_ok; } - if (atomic_read(&lo->lo_owner.so_count) != 2) { - spin_unlock(&clp->cl_lock); - nfs4_put_stateowner(&lo->lo_owner); - return nfserr_locks_held; + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + spin_unlock(&clp->cl_lock); + nfs4_put_stateowner(&lo->lo_owner); + return nfserr_locks_held; + } } unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { @@ -7992,11 +8083,20 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8068,10 +8168,16 @@ nfs4_state_start(void) { int ret; - ret = nfsd4_create_callback_queue(); + ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); if (ret) return ret; + ret = nfsd4_create_callback_queue(); + if (ret) { + rhltable_destroy(&nfs4_file_rhltable); + return ret; + } + set_max_delegations(); return 0; } @@ -8083,6 +8189,8 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -8111,6 +8219,7 @@ void nfs4_state_shutdown(void) { nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 89a579be042e..4ed9fef14adc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) static __be32 nfsd4_decode_access(struct nfsd4_compoundargs *argp, - struct nfsd4_access *access) + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) return nfserr_bad_xdr; return nfs_ok; } static __be32 -nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) +nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) return nfserr_bad_xdr; return nfsd4_decode_stateid4(argp, &close->cl_stateid); @@ -787,8 +789,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) static __be32 -nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) +nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) @@ -798,8 +801,9 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit } static __be32 -nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) +nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; __be32 *p, status; memset(create, 0, sizeof(*create)); @@ -844,22 +848,25 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } static inline __be32 -nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_delegreturn *dr = &u->delegreturn; return nfsd4_decode_stateid4(argp, &dr->dr_stateid); } static inline __be32 -nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) +nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; memset(getattr, 0, sizeof(*getattr)); return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, ARRAY_SIZE(getattr->ga_bmval)); } static __be32 -nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) +nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; memset(link, 0, sizeof(*link)); return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); } @@ -907,8 +914,9 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; memset(lock, 0, sizeof(*lock)); if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) return nfserr_bad_xdr; @@ -924,8 +932,9 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) +nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; memset(lockt, 0, sizeof(*lockt)); if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) return nfserr_bad_xdr; @@ -940,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) } static __be32 -nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) +nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; __be32 status; if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) @@ -962,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) } static __be32 -nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) +nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lookup *lookup = &u->lookup; return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); } @@ -1143,8 +1154,9 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; __be32 status; u32 dummy; @@ -1171,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) } static __be32 -nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) +nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *open_conf = &u->open_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1190,8 +1204,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con } static __be32 -nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) +nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *open_down = &u->open_downgrade; __be32 status; memset(open_down, 0, sizeof(*open_down)); @@ -1209,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d } static __be32 -nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) +nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_putfh *putfh = &u->putfh; __be32 *p; if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) @@ -1229,7 +1246,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 -nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { if (argp->minorversion == 0) return nfs_ok; @@ -1237,8 +1254,9 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) } static __be32 -nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) +nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; __be32 status; memset(read, 0, sizeof(*read)); @@ -1254,8 +1272,9 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) } static __be32 -nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) +nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; __be32 status; memset(readdir, 0, sizeof(*readdir)); @@ -1276,15 +1295,17 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read } static __be32 -nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) +nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); } static __be32 -nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) +nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; __be32 status; memset(rename, 0, sizeof(*rename)); @@ -1295,22 +1316,25 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename } static __be32 -nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) +nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + clientid_t *clientid = &u->renew; return nfsd4_decode_clientid4(argp, clientid); } static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; secinfo->si_exp = NULL; return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } static __be32 -nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; __be32 status; memset(setattr, 0, sizeof(*setattr)); @@ -1324,8 +1348,9 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta } static __be32 -nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) +nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setclientid *setclientid = &u->setclientid; __be32 *p, status; memset(setclientid, 0, sizeof(*setclientid)); @@ -1367,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient } static __be32 -nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) +nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1382,8 +1409,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s /* Also used for NVERIFY */ static __be32 -nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) +nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_verify *verify = &u->verify; __be32 *p, status; memset(verify, 0, sizeof(*verify)); @@ -1409,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify } static __be32 -nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) +nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; __be32 status; status = nfsd4_decode_stateid4(argp, &write->wr_stateid); @@ -1434,8 +1463,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) } static __be32 -nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; __be32 status; if (argp->minorversion >= 1) @@ -1452,16 +1483,20 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel return nfs_ok; } -static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; memset(bc, 0, sizeof(*bc)); if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) return nfserr_bad_xdr; return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); } -static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; u32 use_conn_in_rdma_mode; __be32 status; @@ -1603,8 +1638,9 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; __be32 status; memset(exid, 0, sizeof(*exid)); @@ -1656,8 +1692,9 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; __be32 status; memset(sess, 0, sizeof(*sess)); @@ -1681,23 +1718,26 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_session *destroy_session) + union nfsd4_op_u *u) { + struct nfsd4_destroy_session *destroy_session = &u->destroy_session; return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); } static __be32 nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, - struct nfsd4_free_stateid *free_stateid) + union nfsd4_op_u *u) { + struct nfsd4_free_stateid *free_stateid = &u->free_stateid; return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; __be32 status; memset(gdev, 0, sizeof(*gdev)); @@ -1717,8 +1757,9 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; __be32 *p, status; memset(lcp, 0, sizeof(*lcp)); @@ -1753,8 +1794,9 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; __be32 status; memset(lgp, 0, sizeof(*lgp)); @@ -1781,8 +1823,9 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; memset(lrp, 0, sizeof(*lrp)); if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) return nfserr_bad_xdr; @@ -1795,8 +1838,9 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, #endif /* CONFIG_NFSD_PNFS */ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo_no_name *sin) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name; if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) return nfserr_bad_xdr; @@ -1806,8 +1850,9 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; __be32 *p, status; status = nfsd4_decode_sessionid4(argp, &seq->sessionid); @@ -1826,8 +1871,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; __be32 status; u32 i; @@ -1852,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta } static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_clientid *dc) + union nfsd4_op_u *u) { + struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; return nfsd4_decode_clientid4(argp, &dc->clientid); } static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, - struct nfsd4_reclaim_complete *rc) + union nfsd4_op_u *u) { + struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) return nfserr_bad_xdr; return nfs_ok; @@ -1867,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, - struct nfsd4_fallocate *fallocate) + union nfsd4_op_u *u) { + struct nfsd4_fallocate *fallocate = &u->allocate; __be32 status; status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); @@ -1924,8 +1974,9 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) +nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; u32 consecutive, i, count, sync; struct nl4_server *ns_dummy; __be32 status; @@ -1982,8 +2033,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) static __be32 nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; __be32 status; memset(cn, 0, sizeof(*cn)); @@ -2002,16 +2054,18 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; os->count = 0; os->status = 0; return nfsd4_decode_stateid4(argp, &os->stateid); } static __be32 -nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 status; status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); @@ -2028,8 +2082,9 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) } static __be32 -nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_clone *clone = &u->clone; __be32 status; status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); @@ -2154,8 +2209,9 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) */ static __be32 nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; __be32 status; u32 maxcount; @@ -2173,8 +2229,9 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; u32 flags, maxcount, size; __be32 status; @@ -2214,8 +2271,9 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; u32 maxcount; memset(listxattrs, 0, sizeof(*listxattrs)); @@ -2245,113 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; memset(removexattr, 0, sizeof(*removexattr)); return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); } static __be32 -nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfs_ok; } static __be32 -nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfserr_notsupp; } -typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u); static const nfsd4_dec nfsd4_dec_ops[] = { - [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, - [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] = (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, - [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, - [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, + [OP_ACCESS] = nfsd4_decode_access, + [OP_CLOSE] = nfsd4_decode_close, + [OP_COMMIT] = nfsd4_decode_commit, + [OP_CREATE] = nfsd4_decode_create, + [OP_DELEGPURGE] = nfsd4_decode_notsupp, + [OP_DELEGRETURN] = nfsd4_decode_delegreturn, + [OP_GETATTR] = nfsd4_decode_getattr, + [OP_GETFH] = nfsd4_decode_noop, + [OP_LINK] = nfsd4_decode_link, + [OP_LOCK] = nfsd4_decode_lock, + [OP_LOCKT] = nfsd4_decode_lockt, + [OP_LOCKU] = nfsd4_decode_locku, + [OP_LOOKUP] = nfsd4_decode_lookup, + [OP_LOOKUPP] = nfsd4_decode_noop, + [OP_NVERIFY] = nfsd4_decode_verify, + [OP_OPEN] = nfsd4_decode_open, + [OP_OPENATTR] = nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade, + [OP_PUTFH] = nfsd4_decode_putfh, + [OP_PUTPUBFH] = nfsd4_decode_putpubfh, + [OP_PUTROOTFH] = nfsd4_decode_noop, + [OP_READ] = nfsd4_decode_read, + [OP_READDIR] = nfsd4_decode_readdir, + [OP_READLINK] = nfsd4_decode_noop, + [OP_REMOVE] = nfsd4_decode_remove, + [OP_RENAME] = nfsd4_decode_rename, + [OP_RENEW] = nfsd4_decode_renew, + [OP_RESTOREFH] = nfsd4_decode_noop, + [OP_SAVEFH] = nfsd4_decode_noop, + [OP_SECINFO] = nfsd4_decode_secinfo, + [OP_SETATTR] = nfsd4_decode_setattr, + [OP_SETCLIENTID] = nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = nfsd4_decode_verify, + [OP_WRITE] = nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, - [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, - [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl, + [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = nfsd4_decode_free_stateid, + [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_decode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp, + [OP_LAYOUTGET] = nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = nfsd4_decode_notsupp, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, - [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, - [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, + [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_decode_sequence, + [OP_SET_SSV] = nfsd4_decode_notsupp, + [OP_TEST_STATEID] = nfsd4_decode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid, + [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete, /* new operations for NFSv4.2 */ - [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, - [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, - [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, - [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, + [OP_ALLOCATE] = nfsd4_decode_fallocate, + [OP_COPY] = nfsd4_decode_copy, + [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify, + [OP_DEALLOCATE] = nfsd4_decode_fallocate, + [OP_IO_ADVISE] = nfsd4_decode_notsupp, + [OP_LAYOUTERROR] = nfsd4_decode_notsupp, + [OP_LAYOUTSTATS] = nfsd4_decode_notsupp, + [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status, + [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status, + [OP_READ_PLUS] = nfsd4_decode_read, + [OP_SEEK] = nfsd4_decode_seek, + [OP_WRITE_SAME] = nfsd4_decode_notsupp, + [OP_CLONE] = nfsd4_decode_clone, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, - [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, - [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, + [OP_GETXATTR] = nfsd4_decode_getxattr, + [OP_SETXATTR] = nfsd4_decode_setxattr, + [OP_LISTXATTRS] = nfsd4_decode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_decode_removexattr, }; static inline bool @@ -2482,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, return p; } +static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, + struct timespec64 *tv) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 3); + if (!p) + return nfserr_resource; + + p = xdr_encode_hyper(p, (s64)tv->tv_sec); + *p = cpu_to_be32(tv->tv_nsec); + return nfs_ok; +} + /* * ctime (in NFSv4, time_metadata) is not writeable, and the client * doesn't really care what resolution could theoretically be stored by @@ -3287,11 +3360,14 @@ out_acl: p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); - *p++ = cpu_to_be32(stat.atime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.atime); + if (status) + goto out; + } + if (bmval1 & FATTR4_WORD1_TIME_CREATE) { + status = nfsd4_encode_nfstime4(xdr, &stat.btime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); @@ -3300,25 +3376,14 @@ out_acl: p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); - *p++ = cpu_to_be32(stat.ctime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.ctime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); - *p++ = cpu_to_be32(stat.mtime.tv_nsec); - } - if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec); - *p++ = cpu_to_be32(stat.btime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.mtime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { u64 ino = stat.ino; @@ -3643,8 +3708,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) } static __be32 -nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) +nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3656,8 +3723,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ return 0; } -static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3673,8 +3742,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, } static __be32 -nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) +nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); @@ -3682,8 +3753,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 -nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) +nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3696,8 +3769,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) +nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3710,8 +3785,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) +nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = resp->xdr; @@ -3720,8 +3797,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct svc_fh **fhpp = &u->getfh; struct xdr_stream *xdr = resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; @@ -3775,8 +3854,10 @@ again: } static __be32 -nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) +nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; struct xdr_stream *xdr = resp->xdr; if (!nfserr) @@ -3788,8 +3869,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo } static __be32 -nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) +nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; struct xdr_stream *xdr = resp->xdr; if (nfserr == nfserr_denied) @@ -3798,8 +3881,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l } static __be32 -nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) +nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); @@ -3807,8 +3892,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 -nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) +nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3821,8 +3908,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 -nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) +nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3915,16 +4004,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op } static __be32 -nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) +nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *oc = &u->open_confirm; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 -nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) +nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *od = &u->open_downgrade; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); @@ -4023,8 +4116,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; struct xdr_stream *xdr = resp->xdr; @@ -4065,8 +4159,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readlink *readlink = &u->readlink; __be32 *p, *maxcount_p, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; @@ -4110,8 +4206,10 @@ out_err: } static __be32 -nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; int maxcount; int bytes_left; loff_t offset; @@ -4201,8 +4299,10 @@ err_no_verf: } static __be32 -nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) +nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4214,8 +4314,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) +nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4297,8 +4399,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); @@ -4306,8 +4409,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo_no_name *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); @@ -4318,8 +4422,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, * regardless of the error status. */ static __be32 -nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) +nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4342,8 +4448,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) +nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setclientid *scd = &u->setclientid; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4366,8 +4474,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n } static __be32 -nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) +nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4383,8 +4493,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; struct xdr_stream *xdr = resp->xdr; __be32 *p; char *major_id; @@ -4461,8 +4572,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4514,8 +4626,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4537,8 +4650,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_test_stateid *test_stateid) + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4558,8 +4672,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; @@ -4611,8 +4726,9 @@ toosmall: static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4638,8 +4754,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4659,8 +4776,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4745,8 +4863,9 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy *copy) + union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, @@ -4762,8 +4881,9 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4777,156 +4897,83 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof, - loff_t *pos) + struct nfsd4_read *read) { - struct xdr_stream *xdr = resp->xdr; + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); struct file *file = read->rd_nf->nf_file; - int starting_len = xdr->buf->len; - loff_t hole_pos; - __be32 nfserr; - __be32 *p, tmp; - __be64 tmp64; - - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); - if (hole_pos > read->rd_offset) - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); + struct xdr_stream *xdr = resp->xdr; + unsigned long maxcount; + __be32 nfserr, *p; /* Content type, offset, byte count */ p = xdr_reserve_space(xdr, 4 + 8 + 4); if (!p) - return nfserr_resource; + return nfserr_io; + if (resp->xdr->buf->page_len && splice_ok) { + WARN_ON_ONCE(splice_ok); + return nfserr_serverfault; + } - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); - if (read->rd_vlen < 0) - return nfserr_resource; + maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); + if (file->f_op->splice_read && splice_ok) + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + nfserr = nfsd4_encode_readv(resp, read, file, maxcount); if (nfserr) return nfserr; - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); - tmp = htonl(NFS4_CONTENT_DATA); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp64 = cpu_to_be64(read->rd_offset); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); - tmp = htonl(*maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); - - tmp = xdr_zero; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, - xdr_pad_size(*maxcount)); - return nfs_ok; -} - -static __be32 -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof) -{ - struct file *file = read->rd_nf->nf_file; - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); - loff_t f_size = i_size_read(file_inode(file)); - unsigned long count; - __be32 *p; - - if (data_pos == -ENXIO) - data_pos = f_size; - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); - count = data_pos - read->rd_offset; - - /* Content type, offset, byte count */ - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); - if (!p) - return nfserr_resource; - - *p++ = htonl(NFS4_CONTENT_HOLE); + *p++ = cpu_to_be32(NFS4_CONTENT_DATA); p = xdr_encode_hyper(p, read->rd_offset); - p = xdr_encode_hyper(p, count); + *p = cpu_to_be32(read->rd_length); - *eof = (read->rd_offset + count) >= f_size; - *maxcount = min_t(unsigned long, count, *maxcount); return nfs_ok; } static __be32 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { - unsigned long maxcount, count; + struct nfsd4_read *read = &u->read; + struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; - struct file *file; int starting_len = xdr->buf->len; - int last_segment = xdr->buf->len; - int segments = 0; - __be32 *p, tmp; - bool is_data; - loff_t pos; - u32 eof; + u32 segments = 0; + __be32 *p; if (nfserr) return nfserr; - file = read->rd_nf->nf_file; /* eof flag, segment count */ p = xdr_reserve_space(xdr, 4 + 4); if (!p) - return nfserr_resource; + return nfserr_io; xdr_commit_encode(xdr); - maxcount = min_t(unsigned long, read->rd_length, - (xdr->buf->buflen - xdr->buf->len)); - count = maxcount; - - eof = read->rd_offset >= i_size_read(file_inode(file)); - if (eof) + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file)); + if (read->rd_eof) goto out; - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); - is_data = pos > read->rd_offset; - - while (count > 0 && !eof) { - maxcount = count; - if (is_data) - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, - segments == 0 ? &pos : NULL); - else - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); - if (nfserr) - goto out; - count -= maxcount; - read->rd_offset += maxcount; - is_data = !is_data; - last_segment = xdr->buf->len; - segments++; + nfserr = nfsd4_encode_read_plus_data(resp, read); + if (nfserr) { + xdr_truncate_encode(xdr, starting_len); + return nfserr; } + segments++; + out: - if (nfserr && segments == 0) - xdr_truncate_encode(xdr, starting_len); - else { - if (nfserr) { - xdr_truncate_encode(xdr, last_segment); - nfserr = nfs_ok; - eof = 0; - } - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - } - + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(segments); return nfserr; } static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4960,8 +5007,9 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_seek *seek) + union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 *p; p = xdr_reserve_space(resp->xdr, 4 + 8); @@ -4972,7 +5020,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *p) { return nfserr; } @@ -5023,8 +5072,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) static __be32 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p, err; @@ -5047,8 +5097,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5088,8 +5139,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, static __be32 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; struct xdr_stream *xdr = resp->xdr; u32 cookie_offset, count_offset, eof; u32 left, xdrleft, slen, count; @@ -5199,8 +5251,9 @@ out: static __be32 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5212,7 +5265,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } -typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); +typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); /* * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 @@ -5220,93 +5273,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); * done in the decoding phase. */ static const nfsd4_enc nfsd4_enc_ops[] = { - [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, - [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, - [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, - [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, - [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, - [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, - [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, - [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, - [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, - [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, - [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, - [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, - [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_READ] = (nfsd4_enc)nfsd4_encode_read, - [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, - [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, - [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, - [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, - [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, - [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, - [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, - [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ACCESS] = nfsd4_encode_access, + [OP_CLOSE] = nfsd4_encode_close, + [OP_COMMIT] = nfsd4_encode_commit, + [OP_CREATE] = nfsd4_encode_create, + [OP_DELEGPURGE] = nfsd4_encode_noop, + [OP_DELEGRETURN] = nfsd4_encode_noop, + [OP_GETATTR] = nfsd4_encode_getattr, + [OP_GETFH] = nfsd4_encode_getfh, + [OP_LINK] = nfsd4_encode_link, + [OP_LOCK] = nfsd4_encode_lock, + [OP_LOCKT] = nfsd4_encode_lockt, + [OP_LOCKU] = nfsd4_encode_locku, + [OP_LOOKUP] = nfsd4_encode_noop, + [OP_LOOKUPP] = nfsd4_encode_noop, + [OP_NVERIFY] = nfsd4_encode_noop, + [OP_OPEN] = nfsd4_encode_open, + [OP_OPENATTR] = nfsd4_encode_noop, + [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade, + [OP_PUTFH] = nfsd4_encode_noop, + [OP_PUTPUBFH] = nfsd4_encode_noop, + [OP_PUTROOTFH] = nfsd4_encode_noop, + [OP_READ] = nfsd4_encode_read, + [OP_READDIR] = nfsd4_encode_readdir, + [OP_READLINK] = nfsd4_encode_readlink, + [OP_REMOVE] = nfsd4_encode_remove, + [OP_RENAME] = nfsd4_encode_rename, + [OP_RENEW] = nfsd4_encode_noop, + [OP_RESTOREFH] = nfsd4_encode_noop, + [OP_SAVEFH] = nfsd4_encode_noop, + [OP_SECINFO] = nfsd4_encode_secinfo, + [OP_SETATTR] = nfsd4_encode_setattr, + [OP_SETCLIENTID] = nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop, + [OP_VERIFY] = nfsd4_encode_noop, + [OP_WRITE] = nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop, /* NFSv4.1 operations */ - [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = nfsd4_encode_noop, + [OP_FREE_STATEID] = nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_encode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICEINFO] = nfsd4_encode_noop, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_noop, + [OP_LAYOUTGET] = nfsd4_encode_noop, + [OP_LAYOUTRETURN] = nfsd4_encode_noop, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, - [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, - [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_encode_sequence, + [OP_SET_SSV] = nfsd4_encode_noop, + [OP_TEST_STATEID] = nfsd4_encode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop, /* NFSv4.2 operations */ - [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, - [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, - [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, - [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, - [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ALLOCATE] = nfsd4_encode_noop, + [OP_COPY] = nfsd4_encode_copy, + [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify, + [OP_DEALLOCATE] = nfsd4_encode_noop, + [OP_IO_ADVISE] = nfsd4_encode_noop, + [OP_LAYOUTERROR] = nfsd4_encode_noop, + [OP_LAYOUTSTATS] = nfsd4_encode_noop, + [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop, + [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status, + [OP_READ_PLUS] = nfsd4_encode_read_plus, + [OP_SEEK] = nfsd4_encode_seek, + [OP_WRITE_SAME] = nfsd4_encode_noop, + [OP_CLONE] = nfsd4_encode_noop, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, - [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, - [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, + [OP_GETXATTR] = nfsd4_encode_getxattr, + [OP_SETXATTR] = nfsd4_encode_setxattr, + [OP_LISTXATTRS] = nfsd4_encode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_encode_removexattr, }; /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 573de0d49e17..76a60e7a7509 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -581,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { +#ifdef CONFIG_NFSD_V2 case 2: +#endif case 3: nfsd_vers(nn, num, cmd); break; @@ -601,7 +603,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } break; default: - return -EINVAL; + /* Ignore requests to disable non-existent versions */ + if (cmd == NFSD_SET) + return -EINVAL; } vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); @@ -1448,9 +1452,7 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd4_init_leases_net(nn); - if (retval) - goto out_drc_error; + nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) goto out_cache_error; @@ -1460,8 +1462,6 @@ static __net_init int nfsd_init_net(struct net *net) return 0; out_cache_error: - nfsd4_leases_net_shutdown(nn); -out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1477,7 +1477,6 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); - nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 09726c5b9a31..fa0144a74267 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -64,8 +64,7 @@ struct readdir_cd { extern struct svc_program nfsd_program; -extern const struct svc_version nfsd_version2, nfsd_version3, - nfsd_version4; +extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; @@ -505,8 +504,7 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern int nfsd4_init_leases_net(struct nfsd_net *nn); -extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); +extern void nfsd4_init_leases_net(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -514,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; -static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index c3ae6414fc5c..513e028b0bbe 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -220,7 +220,7 @@ __be32 fh_update(struct svc_fh *); void fh_put(struct svc_fh *); static __inline__ struct svc_fh * -fh_copy(struct svc_fh *dst, struct svc_fh *src) +fh_copy(struct svc_fh *dst, const struct svc_fh *src) { WARN_ON(src->fh_dentry); @@ -229,7 +229,7 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src) } static inline void -fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) +fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src) { dst->fh_size = src->fh_size; memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); @@ -243,7 +243,8 @@ fh_init(struct svc_fh *fhp, int maxsize) return fhp; } -static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; @@ -252,7 +253,8 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) return true; } -static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_fsid_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_fsid_type != fh2->fh_fsid_type) return false; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 82b3ddeacc33..9744443c3965 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -848,65 +848,3 @@ const struct svc_version nfsd_version2 = { .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS2_SVC_XDRSIZE, }; - -/* - * Map errnos to NFS errnos. - */ -__be32 -nfserrno (int errno) -{ - static struct { - __be32 nfserr; - int syserr; - } nfs_errtbl[] = { - { nfs_ok, 0 }, - { nfserr_perm, -EPERM }, - { nfserr_noent, -ENOENT }, - { nfserr_io, -EIO }, - { nfserr_nxio, -ENXIO }, - { nfserr_fbig, -E2BIG }, - { nfserr_stale, -EBADF }, - { nfserr_acces, -EACCES }, - { nfserr_exist, -EEXIST }, - { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, - { nfserr_nodev, -ENODEV }, - { nfserr_notdir, -ENOTDIR }, - { nfserr_isdir, -EISDIR }, - { nfserr_inval, -EINVAL }, - { nfserr_fbig, -EFBIG }, - { nfserr_nospc, -ENOSPC }, - { nfserr_rofs, -EROFS }, - { nfserr_mlink, -EMLINK }, - { nfserr_nametoolong, -ENAMETOOLONG }, - { nfserr_notempty, -ENOTEMPTY }, -#ifdef EDQUOT - { nfserr_dquot, -EDQUOT }, -#endif - { nfserr_stale, -ESTALE }, - { nfserr_jukebox, -ETIMEDOUT }, - { nfserr_jukebox, -ERESTARTSYS }, - { nfserr_jukebox, -EAGAIN }, - { nfserr_jukebox, -EWOULDBLOCK }, - { nfserr_jukebox, -ENOMEM }, - { nfserr_io, -ETXTBSY }, - { nfserr_notsupp, -EOPNOTSUPP }, - { nfserr_toosmall, -ETOOSMALL }, - { nfserr_serverfault, -ESERVERFAULT }, - { nfserr_serverfault, -ENFILE }, - { nfserr_io, -EREMOTEIO }, - { nfserr_stale, -EOPENSTALE }, - { nfserr_io, -EUCLEAN }, - { nfserr_perm, -ENOKEY }, - { nfserr_no_grace, -ENOGRACE}, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { - if (nfs_errtbl[i].syserr == errno) - return nfs_errtbl[i].nfserr; - } - WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); - return nfserr_io; -} - diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c7695ebd28dc..0c75636054a5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static const struct svc_version *nfsd_acl_version[] = { +# if defined(CONFIG_NFSD_V2_ACL) [2] = &nfsd_acl_version2, +# endif +# if defined(CONFIG_NFSD_V3_ACL) [3] = &nfsd_acl_version3, +# endif }; #define NFSD_ACL_MINVERS 2 @@ -116,7 +120,9 @@ static struct svc_stat nfsd_acl_svcstats = { #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ static const struct svc_version *nfsd_version[] = { +#if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, +#endif [3] = &nfsd_version3, #if defined(CONFIG_NFSD_V4) [4] = &nfsd_version4, @@ -1065,7 +1071,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0); *statp = proc->pc_func(rqstp); - if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) + if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e2daef3cc003..e94634d30591 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -368,6 +368,7 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) +#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; @@ -411,6 +412,10 @@ struct nfs4_client { unsigned int cl_state; atomic_t cl_delegs_in_recall; + + struct nfsd4_cb_recall_any *cl_ra; + time64_t cl_ra_time; + struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset @@ -536,16 +541,13 @@ struct nfs4_clnt_odstate { * inode can have multiple filehandles associated with it, so there is * (potentially) a many to one relationship between this struct and struct * inode. - * - * These are hashed by filehandle in the file_hashtbl, which is protected by - * the global state_lock spinlock. */ struct nfs4_file { refcount_t fi_ref; struct inode * fi_inode; bool fi_aliased; spinlock_t fi_lock; - struct hlist_node fi_hash; /* hash on fi_fhandle */ + struct rhlist_head fi_rlist; struct list_head fi_stateids; union { struct list_head fi_delegations; @@ -639,6 +641,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_OFFLOAD, NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, + NFSPROC4_CLNT_CB_RECALL_ANY, }; /* Returns true iff a is later than b: */ diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 132335011cca..84f26f281fe9 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,9 +9,12 @@ #define _NFSD_TRACE_H #include +#include +#include #include "export.h" #include "nfsfh.h" +#include "xdr4.h" #define NFSD_TRACE_PROC_RES_FIELDS \ __field(unsigned int, netns_ino) \ @@ -604,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release); DEFINE_STATEID_EVENT(open); DEFINE_STATEID_EVENT(deleg_read); +DEFINE_STATEID_EVENT(deleg_return); DEFINE_STATEID_EVENT(deleg_recall); DECLARE_EVENT_CLASS(nfsd_stateseqid_class, @@ -636,6 +640,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ DEFINE_STATESEQID_EVENT(preprocess); DEFINE_STATESEQID_EVENT(open_confirm); +TRACE_DEFINE_ENUM(NFS4_OPEN_STID); +TRACE_DEFINE_ENUM(NFS4_LOCK_STID); +TRACE_DEFINE_ENUM(NFS4_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_STID); +TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID); + +#define show_stid_type(x) \ + __print_flags(x, "|", \ + { NFS4_OPEN_STID, "OPEN" }, \ + { NFS4_LOCK_STID, "LOCK" }, \ + { NFS4_DELEG_STID, "DELEG" }, \ + { NFS4_CLOSED_STID, "CLOSED" }, \ + { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \ + { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ + { NFS4_LAYOUT_STID, "LAYOUT" }) + +DECLARE_EVENT_CLASS(nfsd_stid_class, + TP_PROTO( + const struct nfs4_stid *stid + ), + TP_ARGS(stid), + TP_STRUCT__entry( + __field(unsigned long, sc_type) + __field(int, sc_count) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + ), + TP_fast_assign( + const stateid_t *stp = &stid->sc_stateid; + + __entry->sc_type = stid->sc_type; + __entry->sc_count = refcount_read(&stid->sc_count); + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + ), + TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", + __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation, + __entry->sc_count, show_stid_type(__entry->sc_type) + ) +); + +#define DEFINE_STID_EVENT(name) \ +DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \ + TP_PROTO(const struct nfs4_stid *stid), \ + TP_ARGS(stid)) + +DEFINE_STID_EVENT(revoke); + DECLARE_EVENT_CLASS(nfsd_clientid_class, TP_PROTO(const clientid_t *clid), TP_ARGS(clid), @@ -783,7 +842,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class, __array(unsigned char, addr, sizeof(struct sockaddr_in6)) __field(unsigned long, flavor) __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) - __string_len(name, name, clp->cl_name.len) + __string_len(name, clp->cl_name.data, clp->cl_name.len) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; @@ -1436,6 +1495,32 @@ TRACE_EVENT(nfsd_cb_offload, __entry->fh_hash, __entry->count, __entry->status) ); +TRACE_EVENT(nfsd_cb_recall_any, + TP_PROTO( + const struct nfsd4_cb_recall_any *ra + ), + TP_ARGS(ra), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, keep) + __field(unsigned long, bmval0) + __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot; + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id; + __entry->keep = ra->ra_keep; + __entry->bmval0 = ra->ra_bmval[0]; + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr, + ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen); + ), + TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->keep, show_rca_mask(__entry->bmval0) + ) +); + DECLARE_EVENT_CLASS(nfsd_cb_done_class, TP_PROTO( const stateid_t *stp, @@ -1475,6 +1560,27 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); +TRACE_EVENT(nfsd_cb_recall_any_done, + TP_PROTO( + const struct nfsd4_callback *cb, + const struct rpc_task *task + ), + TP_ARGS(cb, task), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(int, status) + ), + TP_fast_assign( + __entry->status = task->tk_status; + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot; + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id; + ), + TP_printk("client %08x:%08x status=%d", + __entry->cl_boot, __entry->cl_id, __entry->status + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index eccc6ce55a63..5d6a61d47a90 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -49,6 +49,69 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP +/** + * nfserrno - Map Linux errnos to NFS errnos + * @errno: POSIX(-ish) error code to be mapped + * + * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If + * it's an error we don't expect, log it once and return nfserr_io. + */ +__be32 +nfserrno (int errno) +{ + static struct { + __be32 nfserr; + int syserr; + } nfs_errtbl[] = { + { nfs_ok, 0 }, + { nfserr_perm, -EPERM }, + { nfserr_noent, -ENOENT }, + { nfserr_io, -EIO }, + { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, + { nfserr_stale, -EBADF }, + { nfserr_acces, -EACCES }, + { nfserr_exist, -EEXIST }, + { nfserr_xdev, -EXDEV }, + { nfserr_mlink, -EMLINK }, + { nfserr_nodev, -ENODEV }, + { nfserr_notdir, -ENOTDIR }, + { nfserr_isdir, -EISDIR }, + { nfserr_inval, -EINVAL }, + { nfserr_fbig, -EFBIG }, + { nfserr_nospc, -ENOSPC }, + { nfserr_rofs, -EROFS }, + { nfserr_mlink, -EMLINK }, + { nfserr_nametoolong, -ENAMETOOLONG }, + { nfserr_notempty, -ENOTEMPTY }, + { nfserr_dquot, -EDQUOT }, + { nfserr_stale, -ESTALE }, + { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_jukebox, -ERESTARTSYS }, + { nfserr_jukebox, -EAGAIN }, + { nfserr_jukebox, -EWOULDBLOCK }, + { nfserr_jukebox, -ENOMEM }, + { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, + { nfserr_toosmall, -ETOOSMALL }, + { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EREMOTEIO }, + { nfserr_stale, -EOPENSTALE }, + { nfserr_io, -EUCLEAN }, + { nfserr_perm, -ENOKEY }, + { nfserr_no_grace, -ENOGRACE}, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { + if (nfs_errtbl[i].syserr == errno) + return nfs_errtbl[i].nfserr; + } + WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. @@ -1317,7 +1380,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode &= ~current_umask(); err = 0; - host_err = 0; switch (type) { case S_IFREG: host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9744b041105b..dbdfef7ae85b 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -60,6 +60,7 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) posix_acl_release(attrs->na_dpacl); } +__be32 nfserrno (int errno); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 36c3340c1d54..510978e602da 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -896,5 +896,10 @@ struct nfsd4_operation { union nfsd4_op_u *); }; +struct nfsd4_cb_recall_any { + struct nfsd4_callback ra_cb; + u32 ra_keep; + u32 ra_bmval[1]; +}; #endif diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 547cf07cf4e0..0d39af1b00a0 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -48,3 +48,9 @@ #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + 1 + 1) +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 40ce92a332fe..146640f0607a 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -724,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr = blocknr; } cnt = 1; @@ -743,7 +743,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) @@ -781,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, out: nilfs_btree_free_path(path); return ret; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + goto out; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 9930fa901039..1e7f653c1df7 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { - return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, - create, &req->pr_entry_bh); + int ret; + + ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, + create, &req->pr_entry_bh); + if (unlikely(ret == -ENOENT)) { + nilfs_err(dat->i_sb, + "DAT doesn't have a block to manage vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + /* + * Return internal code -EINVAL to notify bmap layer of + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } static void nilfs_dat_commit_entry(struct inode *dat, @@ -123,11 +136,7 @@ static void nilfs_dat_commit_free(struct inode *dat, int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) { - int ret; - - ret = nilfs_dat_prepare_entry(dat, req, 0); - WARN_ON(ret == -ENOENT); - return ret; + return nilfs_dat_prepare_entry(dat, req, 0); } void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, @@ -154,10 +163,8 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - WARN_ON(ret == -ENOENT); + if (ret < 0) return ret; - } kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index a35f2795b242..8f802f7b0840 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -66,7 +66,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, dat = nilfs_bmap_get_dat(direct); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr = blocknr; } @@ -79,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt) @@ -87,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, } *ptrp = ptr; return cnt; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + return ret; } static __u64 diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a265d391ffe9..822e8d95d31e 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -105,7 +105,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - wait_for_stable_page(page); + /* + * Since checksumming including data blocks is performed to determine + * the validity of the log to be written and used for recovery, it is + * necessary to wait for writeback to finish here, regardless of the + * stable write requirement of the backing device. + */ + wait_on_page_writeback(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index f625872321cc..8eb4288d46fe 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -112,7 +112,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", __func__, inode->i_ino, (unsigned long long)blkoff); - err = 0; + err = -EAGAIN; } nilfs_transaction_abort(inode->i_sb); goto out; diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 0955b657938f..a9b8d77c8c1d 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, - struct page *page) + loff_t pos, struct page *page) { struct buffer_head *bh_org; + size_t from = pos & ~PAGE_MASK; void *kaddr; bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); @@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, return -EIO; kaddr = kmap_atomic(page); - memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); + memcpy(kaddr + from, bh_org->b_data, bh_org->b_size); kunmap_atomic(kaddr); brelse(bh_org); return 0; @@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, page); if (unlikely(err)) goto failed_page; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a4a147a983e0..0a84613960db 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1702,7 +1702,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); @@ -1713,6 +1712,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } break; } + set_buffer_async_write(bh); if (bh->b_page != fs_page) { nilfs_begin_page_io(fs_page); fs_page = bh->b_page; @@ -1798,7 +1798,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { clear_buffer_uptodate(bh); if (bh->b_page != bd_page) { @@ -1807,6 +1806,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) } break; } + clear_buffer_async_write(bh); if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); fs_page = bh->b_page; @@ -1894,8 +1894,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) BIT(BH_Delay) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Redirected)); - set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh == segbuf->sb_super_root) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; @@ -1903,6 +1904,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) update_sr = true; break; } + set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, 0); fs_page = bh->b_page; diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 2215179c925b..2618bf5a3789 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1658,8 +1658,10 @@ repack: le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) - return -ENOENT; + if (!attr_b) { + err = -ENOENT; + goto out; + } attr = attr_b; le = le_b; @@ -1740,13 +1742,15 @@ ins_ext: ok: run_truncate_around(run, vcn); out: - if (new_valid > data_size) - new_valid = data_size; + if (attr_b) { + if (new_valid > data_size) + new_valid = data_size; - valid_size = le64_to_cpu(attr_b->nres.valid_size); - if (new_valid != valid_size) { - attr_b->nres.valid_size = cpu_to_le64(valid_size); - mi_b->dirty = true; + valid_size = le64_to_cpu(attr_b->nres.valid_size); + if (new_valid != valid_size) { + attr_b->nres.valid_size = cpu_to_le64(valid_size); + mi_b->dirty = true; + } } return err; diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 0c6a68e71e7d..723e49ec83ce 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, { size_t off; u16 sz; + const unsigned le_min_size = le_size(0); if (!le) { le = ni->attr_list.le; } else { sz = le16_to_cpu(le->size); - if (sz < sizeof(struct ATTR_LIST_ENTRY)) { + if (sz < le_min_size) { /* Impossible 'cause we should not return such le. */ return NULL; } @@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, /* Check boundary. */ off = PtrOffset(ni->attr_list.le, le); - if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) { + if (off + le_min_size > ni->attr_list.size) { /* The regular end of list. */ return NULL; } @@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, sz = le16_to_cpu(le->size); /* Check le for errors. */ - if (sz < sizeof(struct ATTR_LIST_ENTRY) || - off + sz > ni->attr_list.size || + if (sz < le_min_size || off + sz > ni->attr_list.size || sz < le->name_off + le->name_len * sizeof(short)) { return NULL; } diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index d4d9f4ffb6d9..72cdfa8727d3 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ - if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) - dt_type = DT_LNK; - else - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* + * NTFS: symlinks are "dir + reparse" or "file + reparse" + * Unfortunately reparse attribute is used for many purposes (several dozens). + * It is not possible here to know is this name symlink or not. + * To get exactly the type of name we should to open inode (read mft). + * getattr for opened file (fstat) correctly returns symlink. + */ + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + + /* + * It is not reliable to detect the type of name using duplicated information + * stored in parent directory. + * The only correct way to get the type of name - read MFT record and find ATTR_STD. + * The code below is not good idea. + * It does additional locks/reads just to get the type of name. + * Should we use additional mount option to enable branch below? + */ + if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + ino != ni->mi.rno) { + struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); + if (!IS_ERR_OR_NULL(inode)) { + dt_type = fs_umode_to_dtype(inode->i_mode); + iput(inode); + } + } return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } @@ -495,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, struct INDEX_HDR *hdr; const struct ATTR_FILE_NAME *fname; u32 e_size, off, end; - u64 vbo = 0; size_t drs = 0, fles = 0, bit = 0; - loff_t i_size = ni->vfs_inode.i_size; struct indx_node *node = NULL; - u8 index_bits = ni->dir.index_bits; + size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits; if (is_empty) *is_empty = true; @@ -543,7 +561,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, fles += 1; } - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_used_bit(&ni->dir, ni, &bit); @@ -553,8 +571,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, if (bit == MINUS_ONE_T) goto out; - vbo = (u64)bit << index_bits; - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, @@ -564,7 +581,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, hdr = &node->index->ihdr; bit += 1; - vbo = (u64)bit << ni->dir.idx2vbn_bits; } out: diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index f31c0389a2e7..14efe46df91e 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1110,6 +1110,8 @@ out: iocb->ki_pos += written; if (iocb->ki_pos > ni->i_valid) ni->i_valid = iocb->ki_pos; + if (iocb->ki_pos > i_size) + i_size_write(inode, iocb->ki_pos); return written; } diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index bb7e33c24073..d26026090024 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -102,7 +102,7 @@ void ni_clear(struct ntfs_inode *ni) { struct rb_node *node; - if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec)) + if (!ni->vfs_inode.i_nlink && ni->mi.mrec && is_rec_inuse(ni->mi.mrec)) ni_delete_all(ni); al_destroy(ni); @@ -3255,6 +3255,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) return 0; } + if (!ni->mi.mrec) + goto out; + if (is_rec_inuse(ni->mi.mrec) && !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) { bool modified = false; diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 710cb5aa5a65..d53ef128fa73 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) { const struct RESTART_AREA *ra; u16 cl, fl, ul; - u32 off, l_size, file_dat_bits, file_size_round; + u32 off, l_size, seq_bits; u16 ro = le16_to_cpu(rhdr->ra_off); u32 sys_page = le32_to_cpu(rhdr->sys_page_size); @@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) /* Make sure the sequence number bits match the log file size. */ l_size = le64_to_cpu(ra->l_size); - file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits); - file_size_round = 1u << (file_dat_bits + 3); - if (file_size_round != l_size && - (file_size_round < l_size || (file_size_round / 2) > l_size)) { - return false; + seq_bits = sizeof(u64) * 8 + 3; + while (l_size) { + l_size >>= 1; + seq_bits -= 1; } + if (seq_bits != ra->seq_num_bits) + return false; + /* The log page data offset and record header length must be quad-aligned. */ if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) || !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8)) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 873b1434a998..4c2d079b3d49 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -976,6 +976,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes) return cpu_to_le32(hash); } +/* + * simple wrapper for sb_bread_unmovable. + */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct buffer_head *bh; + + if (unlikely(block >= sbi->volume.blocks)) { + /* prevent generic message "attempt to access beyond end of device" */ + ntfs_err(sb, "try to read out of volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; + } + + bh = sb_bread_unmovable(sb, block); + if (bh) + return bh; + + ntfs_err(sb, "failed to read volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; +} + int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) { struct block_device *bdev = sb->s_bdev; @@ -1650,6 +1674,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir) out: if (err) { + make_bad_inode(inode); iput(inode); ni = ERR_PTR(err); } @@ -1842,10 +1867,12 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); - if (root_sdh->type != ATTR_ZERO || + if(!(root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || + root_sdh->type != ATTR_ZERO || root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH || - offsetof(struct INDEX_ROOT, ihdr) + root_sdh->ihdr.used > attr->res.data_size) { + offsetof(struct INDEX_ROOT, ihdr) + + le32_to_cpu(root_sdh->ihdr.used) > + le32_to_cpu(attr->res.data_size)) { err = -EINVAL; goto out; } @@ -1861,10 +1888,12 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); - if (root_sii->type != ATTR_ZERO || + if(!(root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || + root_sii->type != ATTR_ZERO || root_sii->rule != NTFS_COLLATION_TYPE_UINT || - offsetof(struct INDEX_ROOT, ihdr) + root_sii->ihdr.used > attr->res.data_size) { + offsetof(struct INDEX_ROOT, ihdr) + + le32_to_cpu(root_sii->ihdr.used) > + le32_to_cpu(attr->res.data_size)) { err = -EINVAL; goto out; } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index b89a33f5761e..eee01db6e0cc 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -998,6 +998,7 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le = NULL; struct ATTRIB *a; const struct INDEX_NAMES *in = &s_index_names[indx->type]; + struct INDEX_ROOT *root = NULL; a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL, mi); @@ -1007,7 +1008,15 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, if (attr) *attr = a; - return resident_data_ex(a, sizeof(struct INDEX_ROOT)); + root = resident_data_ex(a, sizeof(struct INDEX_ROOT)); + + /* length check */ + if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) > + le32_to_cpu(a->res.data_size)) { + return NULL; + } + + return root; } static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, @@ -1097,7 +1106,8 @@ ok: } /* check for index header length */ - if (offsetof(struct INDEX_BUFFER, ihdr) + ib->ihdr.used > bytes) { + if (offsetof(struct INDEX_BUFFER, ihdr) + le32_to_cpu(ib->ihdr.used) > + bytes) { err = -EINVAL; goto out; } diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index dc937089a464..42dd9fdaf415 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -402,7 +402,6 @@ end_enum: goto out; if (!is_match && name) { - /* Reuse rec as buffer for ascii name. */ err = -ENOENT; goto out; } @@ -417,6 +416,7 @@ end_enum: if (names != le16_to_cpu(rec->hard_links)) { /* Correct minor error on the fly. Do not mark inode as dirty. */ + ntfs_inode_warn(inode, "Correct links count -> %u.", names); rec->hard_links = cpu_to_le16(names); ni->mi.dirty = true; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 0f38d558169a..ba26a465b309 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -517,12 +517,10 @@ struct ATTR_LIST_ENTRY { __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. - __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset. + __le16 name[]; // 0x1A: Just to align. To get real name can use name_off. }; // sizeof(0x20) -static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20); - static inline u32 le_size(u8 name_len) { return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) + diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 8c9abaf139e6..0f9bec29f2b7 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -467,7 +467,7 @@ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, int al_update(struct ntfs_inode *ni, int sync); static inline size_t al_aligned(size_t size) { - return (size + 1023) & ~(size_t)1023; + return size_add(size, 1023) & ~(size_t)1023; } /* Globals from bitfunc.c */ @@ -580,6 +580,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); int log_replay(struct ntfs_inode *ni, bool *initialized); /* Globals from fsntfs.c */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block); bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes); int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple); @@ -1012,19 +1013,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } -static inline struct buffer_head *ntfs_bread(struct super_block *sb, - sector_t block) -{ - struct buffer_head *bh = sb_bread(sb, block); - - if (bh) - return bh; - - ntfs_err(sb, "failed to read volume at offset 0x%llx", - (u64)block << sb->s_blocksize_bits); - return NULL; -} - static inline struct ntfs_inode *ntfs_i(struct inode *inode) { return container_of(inode, struct ntfs_inode, vfs_inode); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index ba336c7280b8..a8d4ed7bca02 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -226,11 +226,6 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return NULL; } - if (off + asize < off) { - /* overflow check */ - return NULL; - } - attr = Add2Ptr(attr, asize); off += asize; } @@ -253,8 +248,8 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if ((t32 & 0xf) || (t32 > 0x100)) return NULL; - /* Check boundary. */ - if (off + asize > used) + /* Check overflow and boundary. */ + if (off + asize < off || off + asize > used) return NULL; /* Check size of attribute. */ @@ -491,8 +486,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, return false; if (ni && is_attr_indexed(attr)) { - le16_add_cpu(&ni->mi.mrec->hard_links, -1); - ni->mi.dirty = true; + u16 links = le16_to_cpu(ni->mi.mrec->hard_links); + struct ATTR_FILE_NAME *fname = + attr->type != ATTR_NAME ? + NULL : + resident_data_ex(attr, + SIZEOF_ATTRIBUTE_FILENAME); + if (fname && fname->type == FILE_NAME_DOS) { + /* Do not decrease links count deleting DOS name. */ + } else if (!links) { + /* minor error. Not critical. */ + } else { + ni->mi.mrec->hard_links = cpu_to_le16(links - 1); + ni->mi.dirty = true; + } } used -= asize; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index df15e00c2a3a..d98cf7b382bc 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -217,6 +217,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (!ea->name_len) break; + if (ea->name_len > ea_size) + break; + if (buffer) { /* Check if we can use field ea->name */ if (off + ea_size > size) diff --git a/fs/pipe.c b/fs/pipe.c index 42c7ff41c2db..9873a6030df5 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -436,12 +436,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) goto out; } -#ifdef CONFIG_WATCH_QUEUE - if (pipe->watch_queue) { + if (pipe_has_watch_queue(pipe)) { ret = -EXDEV; goto out; } -#endif /* * If it wasn't empty we try to merge new data into @@ -1303,6 +1301,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) pipe->tail = tail; pipe->head = head; + if (!pipe_has_watch_queue(pipe)) { + pipe->max_usage = nr_slots; + pipe->nr_accounted = nr_slots; + } + spin_unlock_irq(&pipe->rd_wait.lock); /* This might have made more room for writers */ @@ -1320,10 +1323,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) unsigned int nr_slots, size; long ret = 0; -#ifdef CONFIG_WATCH_QUEUE - if (pipe->watch_queue) + if (pipe_has_watch_queue(pipe)) return -EBUSY; -#endif size = round_pipe_size(arg); nr_slots = size >> PAGE_SHIFT; @@ -1356,8 +1357,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) if (ret < 0) goto out_revert_acct; - pipe->max_usage = nr_slots; - pipe->nr_accounted = nr_slots; return pipe->max_usage * PAGE_SIZE; out_revert_acct: @@ -1375,10 +1374,8 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) if (file->f_op != &pipefifo_fops || !pipe) return NULL; -#ifdef CONFIG_WATCH_QUEUE - if (for_splice && pipe->watch_queue) + if (for_splice && pipe_has_watch_queue(pipe)) return NULL; -#endif return pipe; } diff --git a/fs/proc/array.c b/fs/proc/array.c index 49283b8103c7..d210b2f8b7ed 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -467,13 +467,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, int permitted; struct mm_struct *mm; unsigned long long start_time; - unsigned long cmin_flt = 0, cmaj_flt = 0; - unsigned long min_flt = 0, maj_flt = 0; - u64 cutime, cstime, utime, stime; - u64 cgtime, gtime; + unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt; + u64 cutime, cstime, cgtime, utime, stime, gtime; unsigned long rsslim = 0; unsigned long flags; int exit_code = task->exit_code; + struct signal_struct *sig = task->signal; + unsigned int seq = 1; state = *get_task_state(task); vsize = eip = esp = 0; @@ -501,12 +501,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = utime = stime = 0; - cgtime = gtime = 0; if (lock_task_sighand(task, &flags)) { - struct signal_struct *sig = task->signal; - if (sig->tty) { struct pid *pgrp = tty_get_pgrp(sig->tty); tty_pgrp = pid_nr_ns(pgrp, ns); @@ -517,27 +513,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, num_threads = get_nr_threads(task); collect_sigign_sigcatch(task, &sigign, &sigcatch); - cmin_flt = sig->cmin_flt; - cmaj_flt = sig->cmaj_flt; - cutime = sig->cutime; - cstime = sig->cstime; - cgtime = sig->cgtime; rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); - /* add up live thread stats at the group level */ if (whole) { - struct task_struct *t = task; - do { - min_flt += t->min_flt; - maj_flt += t->maj_flt; - gtime += task_gtime(t); - } while_each_thread(task, t); - - min_flt += sig->min_flt; - maj_flt += sig->maj_flt; - thread_group_cputime_adjusted(task, &utime, &stime); - gtime += sig->gtime; - if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) exit_code = sig->group_exit_code; } @@ -551,10 +529,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); - if (!whole) { + + do { + seq++; /* 2 on the 1st/lockless path, otherwise odd */ + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + + cmin_flt = sig->cmin_flt; + cmaj_flt = sig->cmaj_flt; + cutime = sig->cutime; + cstime = sig->cstime; + cgtime = sig->cgtime; + + if (whole) { + struct task_struct *t; + + min_flt = sig->min_flt; + maj_flt = sig->maj_flt; + gtime = sig->gtime; + + rcu_read_lock(); + __for_each_thread(sig, t) { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + gtime += task_gtime(t); + } + rcu_read_unlock(); + } + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + + if (whole) { + thread_group_cputime_adjusted(task, &utime, &stime); + } else { + task_cputime_adjusted(task, &utime, &stime); min_flt = task->min_flt; maj_flt = task->maj_flt; - task_cputime_adjusted(task, &utime, &stime); gtime = task_gtime(task); } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 9bc5c69fae53..c737c4b4fdd1 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -196,25 +196,21 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = d_inode(dentry)->i_private; struct pstore_record *record = p->record; - int rc = 0; if (!record->psi->erase) return -EPERM; /* Make sure we can't race while removing this file. */ - mutex_lock(&records_list_lock); - if (!list_empty(&p->list)) - list_del_init(&p->list); - else - rc = -ENOENT; - p->dentry = NULL; - mutex_unlock(&records_list_lock); - if (rc) - return rc; + scoped_guard(mutex, &records_list_lock) { + if (!list_empty(&p->list)) + list_del_init(&p->list); + else + return -ENOENT; + p->dentry = NULL; + } - mutex_lock(&record->psi->read_mutex); - record->psi->erase(record); - mutex_unlock(&record->psi->read_mutex); + scoped_guard(mutex, &record->psi->read_mutex) + record->psi->erase(record); return simple_unlink(dir, dentry); } @@ -306,19 +302,16 @@ static struct dentry *psinfo_lock_root(void) { struct dentry *root; - mutex_lock(&pstore_sb_lock); + guard(mutex)(&pstore_sb_lock); /* * Having no backend is fine -- no records appear. * Not being mounted is fine -- nothing to do. */ - if (!psinfo || !pstore_sb) { - mutex_unlock(&pstore_sb_lock); + if (!psinfo || !pstore_sb) return NULL; - } root = pstore_sb->s_root; inode_lock(d_inode(root)); - mutex_unlock(&pstore_sb_lock); return root; } @@ -327,29 +320,25 @@ int pstore_put_backend_records(struct pstore_info *psi) { struct pstore_private *pos, *tmp; struct dentry *root; - int rc = 0; root = psinfo_lock_root(); if (!root) return 0; - mutex_lock(&records_list_lock); - list_for_each_entry_safe(pos, tmp, &records_list, list) { - if (pos->record->psi == psi) { - list_del_init(&pos->list); - rc = simple_unlink(d_inode(root), pos->dentry); - if (WARN_ON(rc)) - break; - d_drop(pos->dentry); - dput(pos->dentry); - pos->dentry = NULL; + scoped_guard(mutex, &records_list_lock) { + list_for_each_entry_safe(pos, tmp, &records_list, list) { + if (pos->record->psi == psi) { + list_del_init(&pos->list); + d_invalidate(pos->dentry); + simple_unlink(d_inode(root), pos->dentry); + pos->dentry = NULL; + } } } - mutex_unlock(&records_list_lock); inode_unlock(d_inode(root)); - return rc; + return 0; } /* @@ -369,20 +358,20 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) if (WARN_ON(!inode_is_locked(d_inode(root)))) return -EINVAL; - rc = -EEXIST; + guard(mutex)(&records_list_lock); + /* Skip records that are already present in the filesystem. */ - mutex_lock(&records_list_lock); list_for_each_entry(pos, &records_list, list) { if (pos->record->type == record->type && pos->record->id == record->id && pos->record->psi == record->psi) - goto fail; + return -EEXIST; } rc = -ENOMEM; inode = pstore_get_inode(root->d_sb); if (!inode) - goto fail; + return -ENOMEM; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; scnprintf(name, sizeof(name), "%s-%s-%llu%s", @@ -409,7 +398,6 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) d_add(dentry, inode); list_add(&private->list, &records_list); - mutex_unlock(&records_list_lock); return 0; @@ -417,8 +405,6 @@ fail_private: free_pstore_private(private); fail_inode: iput(inode); -fail: - mutex_unlock(&records_list_lock); return rc; } @@ -464,9 +450,8 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) return -ENOMEM; - mutex_lock(&pstore_sb_lock); - pstore_sb = sb; - mutex_unlock(&pstore_sb_lock); + scoped_guard(mutex, &pstore_sb_lock) + pstore_sb = sb; pstore_get_records(0); @@ -481,17 +466,14 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type, static void pstore_kill_sb(struct super_block *sb) { - mutex_lock(&pstore_sb_lock); + guard(mutex)(&pstore_sb_lock); WARN_ON(pstore_sb && pstore_sb != sb); kill_litter_super(sb); pstore_sb = NULL; - mutex_lock(&records_list_lock); + guard(mutex)(&records_list_lock); INIT_LIST_HEAD(&records_list); - mutex_unlock(&records_list_lock); - - mutex_unlock(&pstore_sb_lock); } static struct file_system_type pstore_fs_type = { diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 02711caaa65e..7ecc914849ad 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -581,6 +581,7 @@ static int ramoops_init_przs(const char *name, } zone_sz = mem_sz / *cnt; + zone_sz = ALIGN_DOWN(zone_sz, 2); if (!zone_sz) { dev_err(dev, "%s zone size == 0\n", name); goto fail; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index b0cf3869d3bf..b67557647d61 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -399,15 +399,17 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ -static inline int mark_all_dquot_dirty(struct dquot * const *dquot) +static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots) { int ret, err, cnt; + struct dquot *dquot; ret = err = 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquot[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) /* Even in case of error we have to continue */ - ret = mark_dquot_dirty(dquot[cnt]); + ret = mark_dquot_dirty(dquot); if (!err) err = ret; } @@ -1004,14 +1006,15 @@ out: } EXPORT_SYMBOL(dqget); -static inline struct dquot **i_dquot(struct inode *inode) +static inline struct dquot __rcu **i_dquot(struct inode *inode) { - return inode->i_sb->s_op->get_dquots(inode); + /* Force __rcu for now until filesystems are fixed */ + return (struct dquot __rcu **)inode->i_sb->s_op->get_dquots(inode); } static int dqinit_needed(struct inode *inode, int type) { - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) @@ -1084,59 +1087,7 @@ out: return err; } -/* - * Remove references to dquots from inode and add dquot to list for freeing - * if we have the last reference to dquot - */ -static void remove_inode_dquot_ref(struct inode *inode, int type, - struct list_head *tofree_head) -{ - struct dquot **dquots = i_dquot(inode); - struct dquot *dquot = dquots[type]; - - if (!dquot) - return; - - dquots[type] = NULL; - if (list_empty(&dquot->dq_free)) { - /* - * The inode still has reference to dquot so it can't be in the - * free list - */ - spin_lock(&dq_list_lock); - list_add(&dquot->dq_free, tofree_head); - spin_unlock(&dq_list_lock); - } else { - /* - * Dquot is already in a list to put so we won't drop the last - * reference here. - */ - dqput(dquot); - } -} - -/* - * Free list of dquots - * Dquots are removed from inodes and no new references can be got so we are - * the only ones holding reference - */ -static void put_dquot_list(struct list_head *tofree_head) -{ - struct list_head *act_head; - struct dquot *dquot; - - act_head = tofree_head->next; - while (act_head != tofree_head) { - dquot = list_entry(act_head, struct dquot, dq_free); - act_head = act_head->next; - /* Remove dquot from the list so we won't have problems... */ - list_del_init(&dquot->dq_free); - dqput(dquot); - } -} - -static void remove_dquot_ref(struct super_block *sb, int type, - struct list_head *tofree_head) +static void remove_dquot_ref(struct super_block *sb, int type) { struct inode *inode; #ifdef CONFIG_QUOTA_DEBUG @@ -1153,11 +1104,18 @@ static void remove_dquot_ref(struct super_block *sb, int type, */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { + struct dquot __rcu **dquots = i_dquot(inode); + struct dquot *dquot = srcu_dereference_check( + dquots[type], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); + #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif - remove_inode_dquot_ref(inode, type, tofree_head); + rcu_assign_pointer(dquots[type], NULL); + if (dquot) + dqput(dquot); } spin_unlock(&dq_data_lock); } @@ -1174,13 +1132,8 @@ static void remove_dquot_ref(struct super_block *sb, int type, /* Gather all references from inodes and drop them */ static void drop_dquot_ref(struct super_block *sb, int type) { - LIST_HEAD(tofree_head); - - if (sb->dq_op) { - remove_dquot_ref(sb, type, &tofree_head); - synchronize_srcu(&dquot_srcu); - put_dquot_list(&tofree_head); - } + if (sb->dq_op) + remove_dquot_ref(sb, type); } static inline @@ -1513,7 +1466,8 @@ static int inode_quota_active(const struct inode *inode) static int __dquot_initialize(struct inode *inode, int type) { int cnt, init_needed = 0; - struct dquot **dquots, *got[MAXQUOTAS] = {}; + struct dquot __rcu **dquots; + struct dquot *got[MAXQUOTAS] = {}; struct super_block *sb = inode->i_sb; qsize_t rsv; int ret = 0; @@ -1588,7 +1542,7 @@ static int __dquot_initialize(struct inode *inode, int type) if (!got[cnt]) continue; if (!dquots[cnt]) { - dquots[cnt] = got[cnt]; + rcu_assign_pointer(dquots[cnt], got[cnt]); got[cnt] = NULL; /* * Make quota reservation system happy if someone @@ -1596,12 +1550,16 @@ static int __dquot_initialize(struct inode *inode, int type) */ rsv = inode_get_rsv_space(inode); if (unlikely(rsv)) { + struct dquot *dquot = srcu_dereference_check( + dquots[cnt], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); + spin_lock(&inode->i_lock); /* Get reservation again under proper lock */ rsv = __inode_get_rsv_space(inode); - spin_lock(&dquots[cnt]->dq_dqb_lock); - dquots[cnt]->dq_dqb.dqb_rsvspace += rsv; - spin_unlock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); + dquot->dq_dqb.dqb_rsvspace += rsv; + spin_unlock(&dquot->dq_dqb_lock); spin_unlock(&inode->i_lock); } } @@ -1623,7 +1581,7 @@ EXPORT_SYMBOL(dquot_initialize); bool dquot_initialize_needed(struct inode *inode) { - struct dquot **dquots; + struct dquot __rcu **dquots; int i; if (!inode_quota_active(inode)) @@ -1648,13 +1606,14 @@ EXPORT_SYMBOL(dquot_initialize_needed); static void __dquot_drop(struct inode *inode) { int cnt; - struct dquot **dquots = i_dquot(inode); + struct dquot __rcu **dquots = i_dquot(inode); struct dquot *put[MAXQUOTAS]; spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - put[cnt] = dquots[cnt]; - dquots[cnt] = NULL; + put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); + rcu_assign_pointer(dquots[cnt], NULL); } spin_unlock(&dq_data_lock); dqput_all(put); @@ -1662,7 +1621,7 @@ static void __dquot_drop(struct inode *inode) void dquot_drop(struct inode *inode) { - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) @@ -1735,7 +1694,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; - struct dquot **dquots; + struct dquot __rcu **dquots; + struct dquot *dquot; if (!inode_quota_active(inode)) { if (reserve) { @@ -1755,27 +1715,26 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; if (reserve) { - ret = dquot_add_space(dquots[cnt], 0, number, flags, - &warn[cnt]); + ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]); } else { - ret = dquot_add_space(dquots[cnt], number, 0, flags, - &warn[cnt]); + ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]); } if (ret) { /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); if (reserve) - dquot_free_reserved_space(dquots[cnt], - number); + dquot_free_reserved_space(dquot, number); else - dquot_decr_space(dquots[cnt], number); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + dquot_decr_space(dquot, number); + spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); goto out_flush_warn; @@ -1805,7 +1764,8 @@ int dquot_alloc_inode(struct inode *inode) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; + struct dquot *dquot; if (!inode_quota_active(inode)) return 0; @@ -1816,17 +1776,19 @@ int dquot_alloc_inode(struct inode *inode) index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]); + ret = dquot_add_inodes(dquot, 1, &warn[cnt]); if (ret) { for (cnt--; cnt >= 0; cnt--) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; /* Back out changes we already did */ - spin_lock(&dquots[cnt]->dq_dqb_lock); - dquot_decr_inodes(dquots[cnt], 1); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); + dquot_decr_inodes(dquot, 1); + spin_unlock(&dquot->dq_dqb_lock); } goto warn_put_all; } @@ -1847,7 +1809,8 @@ EXPORT_SYMBOL(dquot_alloc_inode); */ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { - struct dquot **dquots; + struct dquot __rcu **dquots; + struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { @@ -1863,9 +1826,8 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) { - struct dquot *dquot = dquots[cnt]; - + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) number = dquot->dq_dqb.dqb_rsvspace; @@ -1889,7 +1851,8 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); */ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { - struct dquot **dquots; + struct dquot __rcu **dquots; + struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { @@ -1905,9 +1868,8 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) { - struct dquot *dquot = dquots[cnt]; - + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) number = dquot->dq_dqb.dqb_curspace; @@ -1933,7 +1895,8 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot **dquots; + struct dquot __rcu **dquots; + struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!inode_quota_active(inode)) { @@ -1954,17 +1917,18 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); - wtype = info_bdq_free(dquots[cnt], number); + spin_lock(&dquot->dq_dqb_lock); + wtype = info_bdq_free(dquot, number); if (wtype != QUOTA_NL_NOWARN) - prepare_warning(&warn[cnt], dquots[cnt], wtype); + prepare_warning(&warn[cnt], dquot, wtype); if (reserve) - dquot_free_reserved_space(dquots[cnt], number); + dquot_free_reserved_space(dquot, number); else - dquot_decr_space(dquots[cnt], number); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + dquot_decr_space(dquot, number); + spin_unlock(&dquot->dq_dqb_lock); } if (reserve) *inode_reserved_space(inode) -= number; @@ -1988,7 +1952,8 @@ void dquot_free_inode(struct inode *inode) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; + struct dquot *dquot; int index; if (!inode_quota_active(inode)) @@ -1999,16 +1964,16 @@ void dquot_free_inode(struct inode *inode) spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; - warn[cnt].w_type = QUOTA_NL_NOWARN; - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); - wtype = info_idq_free(dquots[cnt], 1); + spin_lock(&dquot->dq_dqb_lock); + wtype = info_idq_free(dquot, 1); if (wtype != QUOTA_NL_NOWARN) - prepare_warning(&warn[cnt], dquots[cnt], wtype); - dquot_decr_inodes(dquots[cnt], 1); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + prepare_warning(&warn[cnt], dquot, wtype); + dquot_decr_inodes(dquot, 1); + spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); @@ -2034,8 +1999,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) qsize_t cur_space; qsize_t rsv_space = 0; qsize_t inode_usage = 1; + struct dquot __rcu **dquots; struct dquot *transfer_from[MAXQUOTAS] = {}; - int cnt, ret = 0; + int cnt, index, ret = 0; char is_valid[MAXQUOTAS] = {}; struct dquot_warn warn_to[MAXQUOTAS]; struct dquot_warn warn_from_inodes[MAXQUOTAS]; @@ -2066,6 +2032,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) } cur_space = __inode_get_bytes(inode); rsv_space = __inode_get_rsv_space(inode); + dquots = i_dquot(inode); /* * Build the transfer_from list, check limits, and update usage in * the target structures. @@ -2080,7 +2047,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) if (!sb_has_quota_active(inode->i_sb, cnt)) continue; is_valid[cnt] = 1; - transfer_from[cnt] = i_dquot(inode)[cnt]; + transfer_from[cnt] = srcu_dereference_check(dquots[cnt], + &dquot_srcu, lockdep_is_held(&dq_data_lock)); ret = dquot_add_inodes(transfer_to[cnt], inode_usage, &warn_to[cnt]); if (ret) @@ -2119,13 +2087,21 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) rsv_space); spin_unlock(&transfer_from[cnt]->dq_dqb_lock); } - i_dquot(inode)[cnt] = transfer_to[cnt]; + rcu_assign_pointer(dquots[cnt], transfer_to[cnt]); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(transfer_from); - mark_all_dquot_dirty(transfer_to); + /* + * These arrays are local and we hold dquot references so we don't need + * the srcu protection but still take dquot_srcu to avoid warning in + * mark_all_dquot_dirty(). + */ + index = srcu_read_lock(&dquot_srcu); + mark_all_dquot_dirty((struct dquot __rcu **)transfer_from); + mark_all_dquot_dirty((struct dquot __rcu **)transfer_to); + srcu_read_unlock(&dquot_srcu, index); + flush_warnings(warn_to); flush_warnings(warn_from_inodes); flush_warnings(warn_from_space); diff --git a/fs/select.c b/fs/select.c index 0ee55af1a55c..d4d881d439dc 100644 --- a/fs/select.c +++ b/fs/select.c @@ -476,7 +476,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in, wait->_key |= POLLOUT_SET; } -static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) +static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) { ktime_t expire, *to = NULL; struct poll_wqueues table; diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5a132c1e6f6c..86fe433b1d32 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -149,7 +149,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -EOPNOTSUPP; ses = tcon->ses; - server = ses->server; + server = cifs_pick_channel(ses); cfids = tcon->cfids; if (!server->ops->new_lease_key) @@ -218,7 +218,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, .tcon = tcon, .path = path, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_FILE), - .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES, + .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES | + FILE_READ_EA, .disposition = FILE_OPEN, .fid = pfid, }; @@ -268,10 +269,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) goto oshr_free; - smb2_parse_contexts(server, o_rsp, + rc = smb2_parse_contexts(server, rsp_iov, &oparms.fid->epoch, - oparms.fid->lease_key, &oplock, - NULL, NULL); + oparms.fid->lease_key, + &oplock, NULL, NULL); + if (rc) + goto oshr_free; if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) goto oshr_free; qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index d0ac2648c0d6..d3d4cf6321fd 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -444,7 +444,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { - memset(user, '\0', 2); + *(u16 *)user = 0; } rc = crypto_shash_update(ses->server->secmech.hmacmd5, diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7f1aea4c11b9..58bb54994e22 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -86,7 +86,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 5 +#define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index f4818599c00a..4d5302b58b53 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -209,7 +209,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c switch (match_token(value, cifs_secflavor_tokens, args)) { case Opt_sec_krb5p: - cifs_errorf(fc, "sec=krb5p is not supported!\n"); + cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n"); return 1; case Opt_sec_krb5i: ctx->sign = true; diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 2d75ba5aaa8a..5990bdbae598 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -304,14 +304,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, } static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr, - SEARCH_ID_FULL_DIR_INFO *info, + const void *info, struct cifs_sb_info *cifs_sb) { + const FILE_FULL_DIRECTORY_INFO *di = info; + __dir_info_to_fattr(fattr, info); - /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */ + /* See MS-FSCC 2.4.14, 2.4.19 */ if (fattr->cf_cifsattrs & ATTR_REPARSE) - fattr->cf_cifstag = le32_to_cpu(info->EaSize); + fattr->cf_cifstag = le32_to_cpu(di->EaSize); cifs_fill_common_info(fattr, cifs_sb); } @@ -425,7 +427,7 @@ ffirst_retry: } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; } else /* not srvinos - BB fixme add check for backlevel? */ { - cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; + cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; } search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; @@ -1019,10 +1021,9 @@ static int cifs_filldir(char *find_entry, struct file *file, (FIND_FILE_STANDARD_INFO *)find_entry, cifs_sb); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: case SMB_FIND_FILE_ID_FULL_DIR_INFO: - cifs_fulldir_info_to_fattr(&fattr, - (SEARCH_ID_FULL_DIR_INFO *)find_entry, - cifs_sb); + cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb); break; default: cifs_dir_info_to_fattr(&fattr, diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 634035bcb934..b8e14bcd2c68 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -248,6 +248,8 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) &iface->sockaddr, rc); kref_put(&iface->refcount, release_iface); + /* failure to add chan should increase weight */ + iface->weight_fulfilled++; continue; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 5a157000bdfe..34d1262004df 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -613,7 +613,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } - while (bytes_left >= sizeof(*p)) { + while (bytes_left >= (ssize_t)sizeof(*p)) { memset(&tmp_iface, 0, sizeof(tmp_iface)); tmp_iface.speed = le64_to_cpu(p->LinkSpeed); tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index e65f998ea4cf..4c1231496a72 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2145,17 +2145,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, posix->nlink, posix->mode, posix->reparse_tag); } -void -smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, __u8 *oplock, - struct smb2_file_all_info *buf, - struct create_posix_rsp *posix) +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix) { - char *data_offset; + struct smb2_create_rsp *rsp = rsp_iov->iov_base; struct create_context *cc; - unsigned int next; - unsigned int remaining; + size_t rem, off, len; + size_t doff, dlen; + size_t noff, nlen; char *name; static const char smb3_create_tag_posix[] = { 0x93, 0xAD, 0x25, 0x50, 0x9C, @@ -2164,45 +2165,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server, }; *oplock = 0; - data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); - remaining = le32_to_cpu(rsp->CreateContextsLength); - cc = (struct create_context *)data_offset; + + off = le32_to_cpu(rsp->CreateContextsOffset); + rem = le32_to_cpu(rsp->CreateContextsLength); + if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len) + return -EINVAL; + cc = (struct create_context *)((u8 *)rsp + off); /* Initialize inode number to 0 in case no valid data in qfid context */ if (buf) buf->IndexNumber = 0; - while (remaining >= sizeof(struct create_context)) { - name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) == 4 && - strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0) - *oplock = server->ops->parse_lease_buf(cc, epoch, - lease_key); - else if (buf && (le16_to_cpu(cc->NameLength) == 4) && - strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) - parse_query_id_ctxt(cc, buf); - else if ((le16_to_cpu(cc->NameLength) == 16)) { - if (posix && - memcmp(name, smb3_create_tag_posix, 16) == 0) - parse_posix_ctxt(cc, buf, posix); - } - /* else { - cifs_dbg(FYI, "Context not matched with len %d\n", - le16_to_cpu(cc->NameLength)); - cifs_dump_mem("Cctxt name: ", name, 4); - } */ + while (rem >= sizeof(*cc)) { + doff = le16_to_cpu(cc->DataOffset); + dlen = le32_to_cpu(cc->DataLength); + if (check_add_overflow(doff, dlen, &len) || len > rem) + return -EINVAL; - next = le32_to_cpu(cc->Next); - if (!next) + noff = le16_to_cpu(cc->NameOffset); + nlen = le16_to_cpu(cc->NameLength); + if (noff + nlen > doff) + return -EINVAL; + + name = (char *)cc + noff; + switch (nlen) { + case 4: + if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) { + *oplock = server->ops->parse_lease_buf(cc, epoch, + lease_key); + } else if (buf && + !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) { + parse_query_id_ctxt(cc, buf); + } break; - remaining -= next; - cc = (struct create_context *)((char *)cc + next); + case 16: + if (posix && !memcmp(name, smb3_create_tag_posix, 16)) + parse_posix_ctxt(cc, buf, posix); + break; + default: + cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n", + __func__, nlen, dlen); + if (IS_ENABLED(CONFIG_CIFS_DEBUG2)) + cifs_dump_mem("context data: ", cc, dlen); + break; + } + + off = le32_to_cpu(cc->Next); + if (!off) + break; + if (check_sub_overflow(rem, off, &rem)) + return -EINVAL; + cc = (struct create_context *)((u8 *)cc + off); } if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) *oplock = rsp->OplockLevel; - return; + return 0; } static int @@ -3082,8 +3101,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } - smb2_parse_contexts(server, rsp, &oparms->fid->epoch, - oparms->fid->lease_key, oplock, buf, posix); + rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch, + oparms->fid->lease_key, oplock, buf, posix); creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); @@ -4991,6 +5010,9 @@ int SMB2_query_directory_init(const unsigned int xid, case SMB_FIND_FILE_POSIX_INFO: req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO; break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION; + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", info_level); @@ -5060,6 +5082,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon, /* note that posix payload are variable size */ info_buf_size = sizeof(struct smb2_posix_info); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO); + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", srch_inf->info_level); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index be21b5d26f67..b325fde010ad 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -249,11 +249,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -extern void smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, - __u8 *oplock, struct smb2_file_all_info *buf, - struct create_posix_rsp *posix); +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix); + extern int smb3_encryption_required(const struct cifs_tcon *tcon); extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, struct kvec *iov, unsigned int min_buf_size); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8a1dd8407a3a..df44acaec9ae 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -427,10 +427,17 @@ unmask: server->conn_id, server->hostname); } smbd_done: - if (rc < 0 && rc != -EINTR) + /* + * there's hardly any use for the layers above to know the + * actual error code here. All they should do at this point is + * to retry the connection and hope it goes away. + */ + if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else if (rc > 0) + rc = -ECONNABORTED; + cifs_signal_cifsd_for_reconnect(server, false); + } else if (rc > 0) rc = 0; out: cifs_in_send_dec(server); @@ -449,8 +456,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (!(flags & CIFS_TRANSFORM_REQ)) return __smb_send_rqst(server, num_rqst, rqst); - if (num_rqst > MAX_COMPOUND - 1) - return -ENOMEM; + if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) + return -EIO; if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 7977827c6541..09e1e7771592 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -284,6 +284,7 @@ int ksmbd_conn_handler_loop(void *p) goto out; conn->last_active = jiffies; + set_freezable(); while (ksmbd_conn_alive(conn)) { if (try_to_freeze()) continue; diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index b7521e41402e..0ebf91ffa236 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -304,7 +304,8 @@ enum ksmbd_event { KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST, KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15, - KSMBD_EVENT_MAX + __KSMBD_EVENT_MAX, + KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1 }; /* diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index f1831e26adad..1253e9bde34c 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -105,7 +105,7 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) lease->is_dir = lctx->is_dir; memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE); lease->version = lctx->version; - lease->epoch = le16_to_cpu(lctx->epoch); + lease->epoch = le16_to_cpu(lctx->epoch) + 1; INIT_LIST_HEAD(&opinfo->lease_entry); opinfo->o_lease = lease; @@ -546,6 +546,7 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, atomic_read(&ci->sop_count)) == 1) { if (lease->state != SMB2_LEASE_NONE_LE && lease->state == (lctx->req_state & lease->state)) { + lease->epoch++; lease->state |= lctx->req_state; if (lctx->req_state & SMB2_LEASE_WRITE_CACHING_LE) @@ -556,13 +557,17 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, atomic_read(&ci->sop_count)) > 1) { if (lctx->req_state == (SMB2_LEASE_READ_CACHING_LE | - SMB2_LEASE_HANDLE_CACHING_LE)) + SMB2_LEASE_HANDLE_CACHING_LE)) { + lease->epoch++; lease->state = lctx->req_state; + } } if (lctx->req_state && lease->state == - SMB2_LEASE_NONE_LE) + SMB2_LEASE_NONE_LE) { + lease->epoch++; lease_none_upgrade(opinfo, lctx->req_state); + } } read_lock(&ci->m_lock); } @@ -1035,7 +1040,8 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) SMB2_LEASE_KEY_SIZE); lease2->duration = lease1->duration; lease2->flags = lease1->flags; - lease2->epoch = lease1->epoch++; + lease2->epoch = lease1->epoch; + lease2->version = lease1->version; } static int add_lease_global_list(struct oplock_info *opinfo) @@ -1453,7 +1459,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseFlags = lease->flags; - buf->lcontext.Epoch = cpu_to_le16(++lease->epoch); + buf->lcontext.Epoch = cpu_to_le16(lease->epoch); buf->lcontext.LeaseState = lease->state; memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, SMB2_LEASE_KEY_SIZE); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 6826b562073e..39fc078284c8 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2321,11 +2321,12 @@ out: * @eabuf: set info command buffer * @buf_len: set info command buffer length * @path: dentry path for get ea + * @get_write: get write access to a mount * * Return: 0 on success, otherwise error */ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, - const struct path *path) + const struct path *path, bool get_write) { struct user_namespace *user_ns = mnt_user_ns(path->mnt); char *attr_name = NULL, *value; @@ -3013,7 +3014,7 @@ int smb2_open(struct ksmbd_work *work) rc = smb2_set_ea(&ea_buf->ea, le32_to_cpu(ea_buf->ccontext.DataLength), - &path); + &path, false); if (rc == -EOPNOTSUPP) rc = 0; else if (rc) @@ -5578,6 +5579,7 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; + smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); out: kfree(new_name); @@ -5755,15 +5757,21 @@ static int set_file_allocation_info(struct ksmbd_work *work, loff_t alloc_blks; struct inode *inode; + struct kstat stat; int rc; if (!(fp->daccess & FILE_WRITE_DATA_LE)) return -EACCES; + rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); + if (rc) + return rc; + alloc_blks = (le64_to_cpu(file_alloc_info->AllocationSize) + 511) >> 9; inode = file_inode(fp->filp); - if (alloc_blks > inode->i_blocks) { + if (alloc_blks > stat.blocks) { smb_break_all_levII_oplock(work, fp, 1); rc = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0, alloc_blks * 512); @@ -5771,7 +5779,7 @@ static int set_file_allocation_info(struct ksmbd_work *work, pr_err("vfs_fallocate is failed : %d\n", rc); return rc; } - } else if (alloc_blks < inode->i_blocks) { + } else if (alloc_blks < stat.blocks) { loff_t size; /* @@ -5990,7 +5998,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp, return -EINVAL; return smb2_set_ea((struct smb2_ea_info *)req->Buffer, - buf_len, &fp->filp->f_path); + buf_len, &fp->filp->f_path, true); } case FILE_POSITION_INFORMATION: { @@ -6169,8 +6177,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } kvfree(rpc_resp); } else { err = ksmbd_iov_pin_rsp(work, (void *)rsp, @@ -6380,8 +6390,10 @@ int smb2_read(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } ksmbd_fd_put(work, fp); return 0; diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index b49d47bdafc9..f29bb03f0dc4 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -74,7 +74,7 @@ static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info) static int handle_generic_event(struct sk_buff *skb, struct genl_info *info); static int ksmbd_ipc_heartbeat_request(void); -static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = { +static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = { [KSMBD_EVENT_UNSPEC] = { .len = 0, }, @@ -403,7 +403,7 @@ static int handle_generic_event(struct sk_buff *skb, struct genl_info *info) return -EPERM; #endif - if (type >= KSMBD_EVENT_MAX) { + if (type > KSMBD_EVENT_MAX) { WARN_ON(1); return -EINVAL; } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 66ba57a139d2..4d9782e2fab1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1226,6 +1226,8 @@ out_cancel: dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); out_inode: + /* Free inode->i_link before inode is marked as bad. */ + fscrypt_free_inode(inode); make_bad_inode(inode); iput(inode); out_fname: diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 10c1779af9c5..f7b1f9ece136 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -261,9 +261,6 @@ static int write_begin_slow(struct address_space *mapping, return err; } } - - SetPageUptodate(page); - ClearPageError(page); } if (PagePrivate(page)) @@ -462,9 +459,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return err; } } - - SetPageUptodate(page); - ClearPageError(page); } err = allocate_budget(c, page, ui, appending); @@ -474,10 +468,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * If we skipped reading the page because we were going to * write all of it, then it is not up to date. */ - if (skipped_read) { + if (skipped_read) ClearPageChecked(page); - ClearPageUptodate(page); - } /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the @@ -568,6 +560,9 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, goto out; } + if (len == PAGE_SIZE) + SetPageUptodate(page); + if (!PagePrivate(page)) { attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 4b179526913f..12662b169b71 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1458,6 +1458,18 @@ xfs_fs_fill_super( mp->m_super = sb; + /* + * Copy VFS mount flags from the context now that all parameter parsing + * is guaranteed to have been completed by either the old mount API or + * the newer fsopen/fsconfig API. + */ + if (fc->sb_flags & SB_RDONLY) + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + if (fc->sb_flags & SB_DIRSYNC) + mp->m_features |= XFS_FEAT_DIRSYNC; + if (fc->sb_flags & SB_SYNCHRONOUS) + mp->m_features |= XFS_FEAT_WSYNC; + error = xfs_fs_validate_params(mp); if (error) goto out_free_names; @@ -1915,6 +1927,11 @@ static const struct fs_context_operations xfs_context_ops = { .free = xfs_fs_free, }; +/* + * WARNING: do not initialise any parameters in this function that depend on + * mount option parsing having already been performed as this can be called from + * fsopen() before any parameters have been set. + */ static int xfs_init_fs_context( struct fs_context *fc) { @@ -1947,16 +1964,6 @@ static int xfs_init_fs_context( mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ - /* - * Copy binary VFS mount flags we are interested in. - */ - if (fc->sb_flags & SB_RDONLY) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - if (fc->sb_flags & SB_DIRSYNC) - mp->m_features |= XFS_FEAT_DIRSYNC; - if (fc->sb_flags & SB_SYNCHRONOUS) - mp->m_features |= XFS_FEAT_WSYNC; - fc->s_fs_info = mp; fc->ops = &xfs_context_ops; diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 63cd50840419..8d5f4a5a74e6 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -349,7 +349,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, struct zonefs_inode_info *zi = ZONEFS_I(inode); if (error) { - zonefs_io_error(inode, true); + /* + * For Sync IOs, error recovery is called from + * zonefs_file_dio_write(). + */ + if (!is_sync_kiocb(iocb)) + zonefs_io_error(inode, true); return error; } @@ -577,6 +582,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EINVAL; goto inode_unlock; } + /* + * Advance the zone write pointer offset. This assumes that the + * IO will succeed, which is OK to do because we do not allow + * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO + * fails, the error path will correct the write pointer offset. + */ + z->z_wpoffset += count; + zonefs_inode_account_active(inode); mutex_unlock(&zi->i_truncate_mutex); append = sync; } @@ -596,20 +609,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EBUSY; } - if (zonefs_zone_is_seq(z) && - (ret > 0 || ret == -EIOCBQUEUED)) { - if (ret > 0) - count = ret; - - /* - * Update the zone write pointer offset assuming the write - * operation succeeded. If it did not, the error recovery path - * will correct it. Also do active seq file accounting. - */ - mutex_lock(&zi->i_truncate_mutex); - z->z_wpoffset += count; - zonefs_inode_account_active(inode); - mutex_unlock(&zi->i_truncate_mutex); + /* + * For a failed IO or partial completion, trigger error recovery + * to update the zone write pointer offset to a correct value. + * For asynchronous IOs, zonefs_file_write_dio_end_io() may already + * have executed error recovery if the IO already completed when we + * reach here. However, we cannot know that and execute error recovery + * again (that will not change anything). + */ + if (zonefs_zone_is_seq(z)) { + if (ret > 0 && ret != count) + ret = -EIO; + if (ret < 0 && ret != -EIOCBQUEUED) + zonefs_io_error(inode, true); } inode_unlock: diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 270ded209dde..f6b701261078 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -245,16 +245,18 @@ static void zonefs_inode_update_mode(struct inode *inode) z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; } -struct zonefs_ioerr_data { - struct inode *inode; - bool write; -}; - static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, void *data) { - struct zonefs_ioerr_data *err = data; - struct inode *inode = err->inode; + struct blk_zone *z = data; + + *z = *zone; + return 0; +} + +static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, + bool write) +{ struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); @@ -269,8 +271,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(sb, z, zone); isize = i_size_read(inode); if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && - !err->write && isize == data_size) - return 0; + !write && isize == data_size) + return; /* * At this point, we detected either a bad zone or an inconsistency @@ -291,7 +293,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, * In all cases, warn about inode size inconsistency and handle the * IO error according to the zone condition and to the mount options. */ - if (zonefs_zone_is_seq(z) && isize != data_size) + if (isize != data_size) zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", inode->i_ino, isize, data_size); @@ -351,8 +353,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, zonefs_i_size_write(inode, data_size); z->z_wpoffset = data_size; zonefs_inode_account_active(inode); - - return 0; } /* @@ -366,23 +366,25 @@ void __zonefs_io_error(struct inode *inode, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; - struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = 1; - struct zonefs_ioerr_data err = { - .inode = inode, - .write = write, - }; + struct blk_zone zone; int ret; /* - * The only files that have more than one zone are conventional zone - * files with aggregated conventional zones, for which the inode zone - * size is always larger than the device zone size. + * Conventional zone have no write pointer and cannot become read-only + * or offline. So simply fake a report for a single or aggregated zone + * and let zonefs_handle_io_error() correct the zone inode information + * according to the mount options. */ - if (z->z_size > bdev_zone_sectors(sb->s_bdev)) - nr_zones = z->z_size >> - (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + if (!zonefs_zone_is_seq(z)) { + zone.start = z->z_sector; + zone.len = z->z_size >> SECTOR_SHIFT; + zone.wp = zone.start + zone.len; + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + zone.cond = BLK_ZONE_COND_NOT_WP; + zone.capacity = zone.len; + goto handle_io_error; + } /* * Memory allocations in blkdev_report_zones() can trigger a memory @@ -393,12 +395,20 @@ void __zonefs_io_error(struct inode *inode, bool write) * the GFP_NOIO context avoids both problems. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, - zonefs_io_error_cb, &err); - if (ret != nr_zones) + ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, + zonefs_io_error_cb, &zone); + memalloc_noio_restore(noio_flag); + + if (ret != 1) { zonefs_err(sb, "Get inode %lu zone information failed %d\n", inode->i_ino, ret); - memalloc_noio_restore(noio_flag); + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + return; + } + +handle_io_error: + zonefs_handle_io_error(inode, &zone, write); } static struct kmem_cache *zonefs_inode_cachep; diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h index 1a3ad6d29833..c32e0cf23c90 100644 --- a/include/asm-generic/numa.h +++ b/include/asm-generic/numa.h @@ -35,6 +35,7 @@ int __init numa_add_memblk(int nodeid, u64 start, u64 end); void __init numa_set_distance(int from, int to, int distance); void __init numa_free_distance(void); void __init early_map_cpu_to_node(unsigned int cpu, int nid); +int __init early_cpu_to_node(int cpu); void numa_store_cpu_info(unsigned int cpu); void numa_add_cpu(unsigned int cpu); void numa_remove_cpu(unsigned int cpu); @@ -46,6 +47,7 @@ static inline void numa_add_cpu(unsigned int cpu) { } static inline void numa_remove_cpu(unsigned int cpu) { } static inline void arch_numa_init(void) { } static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { } +static inline int early_cpu_to_node(int cpu) { return 0; } #endif /* CONFIG_NUMA */ diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 699650f81970..a84c64e5f11e 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -104,9 +104,9 @@ static inline u32 get_unaligned_le24(const void *p) static inline void __put_unaligned_be24(const u32 val, u8 *p) { - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; } static inline void put_unaligned_be24(const u32 val, void *p) @@ -116,9 +116,9 @@ static inline void put_unaligned_be24(const u32 val, void *p) static inline void __put_unaligned_le24(const u32 val, u8 *p) { - *p++ = val; - *p++ = val >> 8; - *p++ = val >> 16; + *p++ = val & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = (val >> 16) & 0xff; } static inline void put_unaligned_le24(const u32 val, void *p) @@ -128,12 +128,12 @@ static inline void put_unaligned_le24(const u32 val, void *p) static inline void __put_unaligned_be48(const u64 val, u8 *p) { - *p++ = val >> 40; - *p++ = val >> 32; - *p++ = val >> 24; - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; + *p++ = (val >> 40) & 0xff; + *p++ = (val >> 32) & 0xff; + *p++ = (val >> 24) & 0xff; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; } static inline void put_unaligned_be48(const u64 val, void *p) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f82386315f79..b95f8d549e05 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -351,7 +351,6 @@ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data*) \ - MEM_KEEP(exit.data*) \ *(.data.unlikely) \ __start_once = .; \ *(.data.once) \ @@ -546,7 +545,6 @@ __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ *(.ref.rodata) \ MEM_KEEP(init.rodata) \ - MEM_KEEP(exit.rodata) \ } \ \ /* Built-in module parameters. */ \ @@ -601,7 +599,6 @@ *(.ref.text) \ *(.text.asan.* .text.tsan.*) \ MEM_KEEP(init.text*) \ - MEM_KEEP(exit.text*) \ /* sched.text is aling to function alignment to secure we have same @@ -751,13 +748,10 @@ *(.exit.data .exit.data.*) \ *(.fini_array .fini_array.*) \ *(.dtors .dtors.*) \ - MEM_DISCARD(exit.data*) \ - MEM_DISCARD(exit.rodata*) #define EXIT_TEXT \ *(.exit.text) \ *(.text.exit) \ - MEM_DISCARD(exit.text) #define EXIT_CALL \ *(.exitcall.exit) diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 81c298488b0c..6b5eec10c3db 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -24,6 +24,7 @@ #define __DRM_COLOR_MGMT_H__ #include +#include #include struct drm_crtc; diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index 03cb890690e8..a476a406e599 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -70,7 +70,6 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) } #define DRM_FIXED_POINT 32 -#define DRM_FIXED_POINT_HALF 16 #define DRM_FIXED_ONE (1ULL << DRM_FIXED_POINT) #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1) #define DRM_FIXED_DIGITS_MASK (~DRM_FIXED_DECIMAL_MASK) @@ -89,12 +88,12 @@ static inline int drm_fixp2int(s64 a) static inline int drm_fixp2int_round(s64 a) { - return drm_fixp2int(a + (1 << (DRM_FIXED_POINT_HALF - 1))); + return drm_fixp2int(a + DRM_FIXED_ONE / 2); } static inline int drm_fixp2int_ceil(s64 a) { - if (a > 0) + if (a >= 0) return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE); else return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 9054a5185e1a..31171914990a 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -168,6 +168,7 @@ struct mipi_dsi_device_info { * struct mipi_dsi_device - DSI peripheral device * @host: DSI host for this peripheral * @dev: driver model device node for this peripheral + * @attached: the DSI device has been successfully attached * @name: DSI peripheral chip type * @channel: virtual channel assigned to the peripheral * @format: pixel format for video mode @@ -184,6 +185,7 @@ struct mipi_dsi_device_info { struct mipi_dsi_device { struct mipi_dsi_host *host; struct device dev; + bool attached; char name[DSI_DEV_NAME_SIZE]; unsigned int channel; diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index fafa70ac1337..6f19cf5c210e 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -896,7 +896,8 @@ struct drm_connector_helper_funcs { * * RETURNS: * - * The number of modes added by calling drm_mode_probed_add(). + * The number of modes added by calling drm_mode_probed_add(). Return 0 + * on failures (no modes) instead of negative error codes. */ int (*get_modes)(struct drm_connector *connector); diff --git a/include/dt-bindings/clock/r8a779g0-cpg-mssr.h b/include/dt-bindings/clock/r8a779g0-cpg-mssr.h index 754c54a6eb06..7850cdc62e28 100644 --- a/include/dt-bindings/clock/r8a779g0-cpg-mssr.h +++ b/include/dt-bindings/clock/r8a779g0-cpg-mssr.h @@ -86,5 +86,6 @@ #define R8A779G0_CLK_CPEX 74 #define R8A779G0_CLK_CBFUSA 75 #define R8A779G0_CLK_R 76 +#define R8A779G0_CLK_CP 77 #endif /* __DT_BINDINGS_CLOCK_R8A779G0_CPG_MSSR_H__ */ diff --git a/include/linux/async.h b/include/linux/async.h index cce4ad31e8fc..33c9ff4afb49 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, struct device *dev) return async_schedule_node(func, dev, dev_to_node(dev)); } +bool async_schedule_dev_nocall(async_func_t func, struct device *dev); + /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c04a61ffac8a..1ca1902af23e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2739,10 +2739,18 @@ struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +#define MAX_BPRINTF_BUF 1024 + +struct bpf_bprintf_data { + u32 *bin_args; + char *buf; + bool get_bin_args; + bool get_buf; +}; int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_buf, u32 num_args); -void bpf_bprintf_cleanup(void); + u32 num_args, struct bpf_bprintf_data *data); +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); /* the implementation of the opaque uapi struct bpf_dynptr */ struct bpf_dynptr_kern { diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 9e3dac51eb26..d4dbaae8b521 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -59,7 +59,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c3e0648dc27..fcc95bff72a5 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,7 +25,11 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ + /* + * maximum size for xattrs blob. + * Zeroed by default to force the usage of the (sync) SETXATTR Op. + */ + u64 m_max_xattr_size; u32 m_max_mds; /* expected up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */ u32 possible_max_rank; /* possible max rank index */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h new file mode 100644 index 000000000000..53f1a7a932b0 --- /dev/null +++ b/include/linux/cleanup.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GUARDS_H +#define __LINUX_GUARDS_H + +#include + +/* + * DEFINE_FREE(name, type, free): + * simple helper macro that defines the required wrapper for a __free() + * based cleanup function. @free is an expression using '_T' to access + * the variable. + * + * __free(name): + * variable attribute to add a scoped based cleanup to the variable. + * + * no_free_ptr(var): + * like a non-atomic xchg(var, NULL), such that the cleanup function will + * be inhibited -- provided it sanely deals with a NULL value. + * + * return_ptr(p): + * returns p while inhibiting the __free(). + * + * Ex. + * + * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) + * + * struct obj *p __free(kfree) = kmalloc(...); + * if (!p) + * return NULL; + * + * if (!init_obj(p)) + * return NULL; + * + * return_ptr(p); + */ + +#define DEFINE_FREE(_name, _type, _free) \ + static inline void __free_##_name(void *p) { _type _T = *(_type *)p; _free; } + +#define __free(_name) __cleanup(__free_##_name) + +#define no_free_ptr(p) \ + ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + +#define return_ptr(p) return no_free_ptr(p) + + +/* + * DEFINE_CLASS(name, type, exit, init, init_args...): + * helper to define the destructor and constructor for a type. + * @exit is an expression using '_T' -- similar to FREE above. + * @init is an expression in @init_args resulting in @type + * + * EXTEND_CLASS(name, ext, init, init_args...): + * extends class @name to @name@ext with the new constructor + * + * CLASS(name, var)(args...): + * declare the variable @var as an instance of the named class + * + * Ex. + * + * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd) + * + * CLASS(fdget, f)(fd); + * if (!f.file) + * return -EBADF; + * + * // use 'f' without concern + */ + +#define DEFINE_CLASS(_name, _type, _exit, _init, _init_args...) \ +typedef _type class_##_name##_t; \ +static inline void class_##_name##_destructor(_type *p) \ +{ _type _T = *p; _exit; } \ +static inline _type class_##_name##_constructor(_init_args) \ +{ _type t = _init; return t; } + +#define EXTEND_CLASS(_name, ext, _init, _init_args...) \ +typedef class_##_name##_t class_##_name##ext##_t; \ +static inline void class_##_name##ext##_destructor(class_##_name##_t *p)\ +{ class_##_name##_destructor(p); } \ +static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ +{ class_##_name##_t t = _init; return t; } + +#define CLASS(_name, var) \ + class_##_name##_t var __cleanup(class_##_name##_destructor) = \ + class_##_name##_constructor + + +/* + * DEFINE_GUARD(name, type, lock, unlock): + * trivial wrapper around DEFINE_CLASS() above specifically + * for locks. + * + * guard(name): + * an anonymous instance of the (guard) class + * + * scoped_guard (name, args...) { }: + * similar to CLASS(name, scope)(args), except the variable (with the + * explicit name 'scope') is declard in a for-loop such that its scope is + * bound to the next (compound) statement. + * + */ + +#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ + DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + +#define guard(_name) \ + CLASS(_name, __UNIQUE_ID(guard)) + +#define scoped_guard(_name, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) + +/* + * Additional helper macros for generating lock guards with types, either for + * locks that don't have a native type (eg. RCU, preempt) or those that need a + * 'fat' pointer (eg. spin_lock_irqsave). + * + * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * + * will result in the following type: + * + * typedef struct { + * type *lock; // 'type := void' for the _0 variant + * __VA_ARGS__; + * } class_##name##_t; + * + * As above, both _lock and _unlock are statements, except this time '_T' will + * be a pointer to the above struct. + */ + +#define __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, ...) \ +typedef struct { \ + _type *lock; \ + __VA_ARGS__; \ +} class_##_name##_t; \ + \ +static inline void class_##_name##_destructor(class_##_name##_t *_T) \ +{ \ + if (_T->lock) { _unlock; } \ +} + + +#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \ +static inline class_##_name##_t class_##_name##_constructor(_type *l) \ +{ \ + class_##_name##_t _t = { .lock = l }, *_T = &_t; \ + _lock; \ + return _t; \ +} + +#define __DEFINE_LOCK_GUARD_0(_name, _lock) \ +static inline class_##_name##_t class_##_name##_constructor(void) \ +{ \ + class_##_name##_t _t = { .lock = (void*)1 }, \ + *_T __maybe_unused = &_t; \ + _lock; \ + return _t; \ +} + +#define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \ +__DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \ +__DEFINE_LOCK_GUARD_1(_name, _type, _lock) + +#define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \ +__DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ +__DEFINE_LOCK_GUARD_0(_name, _lock) + +#endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 6cfd6902bd5b..9b673fefcef8 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -5,6 +5,15 @@ /* Compiler specific definitions for Clang compiler */ +/* + * Clang prior to 17 is being silly and considers many __cleanup() variables + * as unused (because they are, their sole purpose is to go out of scope). + * + * https://reviews.llvm.org/D152180 + */ +#undef __cleanup +#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) + /* same as gcc, this was present in clang-2.6 so we can assume it works * with any version that can compile the kernel */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f55a37efdb97..149a520515e1 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,6 +66,26 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + * + * Work around it via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) +#endif + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 898b3458b24a..ae4c9579ca5f 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -75,6 +75,12 @@ # define __assume_aligned(a, ...) #endif +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-cleanup-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#cleanup + */ +#define __cleanup(func) __attribute__((__cleanup__(func))) + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index eb0466236661..574b4121ebe3 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -284,8 +284,15 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ +#ifndef asm_goto_output +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 12e929062f5a..ea89c9777928 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -74,6 +74,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d5595d57f4e5..9d208648c84d 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1023,6 +1023,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +{ + unsigned int freq; + + if (idx < 0) + return false; + + freq = policy->freq_table[idx].frequency; + + return freq == clamp_val(freq, policy->min, policy->max); +} + static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -1056,7 +1068,8 @@ retry: return 0; } - if (idx < 0 && efficiencies) { + /* Limit frequency index to honor policy->min/max */ + if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { efficiencies = false; goto retry; } diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 9192986b1a73..ac862422df15 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size) if (!malloc_ptr) malloc_ptr = free_mem_ptr; - malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */ p = (void *)malloc_ptr; malloc_ptr += size; diff --git a/include/linux/device.h b/include/linux/device.h index 7cf24330d681..5520bb546a4a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -30,6 +30,7 @@ #include #include #include +#include #include struct device; @@ -898,6 +899,9 @@ void device_unregister(struct device *dev); void device_initialize(struct device *dev); int __must_check device_add(struct device *dev); void device_del(struct device *dev); + +DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T)) + int device_for_each_child(struct device *dev, void *data, int (*fn)(struct device *dev, void *data)); int device_for_each_child_reverse(struct device *dev, void *data, @@ -1071,6 +1075,9 @@ extern int (*platform_notify_remove)(struct device *dev); */ struct device *get_device(struct device *dev); void put_device(struct device *dev); + +DEFINE_FREE(put_device, struct device *, if (_T) put_device(_T)) + bool kill_device(struct device *dev); #ifdef CONFIG_DEVTMPFS diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 92e7abfe04f9..70b3737052dd 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -79,7 +79,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * error occurred doing io to the corresponding region. */ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *region, unsigned int long *sync_error_bits); + struct dm_io_region *region, unsigned int long *sync_error_bits, + unsigned short ioprio); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index cfb60843d108..66fc5097bca4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -958,7 +958,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan, static inline bool is_slave_direction(enum dma_transfer_direction direction) { - return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM); + return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) || + (direction == DMA_DEV_TO_DEV); } static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( diff --git a/include/linux/efi.h b/include/linux/efi.h index 4e1bfee9675d..de6d6558a4d3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -390,6 +390,7 @@ void efi_native_runtime_setup(void); #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) #define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) +#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fe848901fcc3..218fc5c54e90 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -221,6 +221,7 @@ struct export_operations { #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply atomic attribute updates */ +#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ unsigned long flags; }; diff --git a/include/linux/file.h b/include/linux/file.h index 39704eae83e2..6e9099d29343 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -10,6 +10,7 @@ #include #include #include +#include struct file; @@ -80,6 +81,8 @@ static inline void fdput_pos(struct fd f) fdput(f); } +DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) + extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); @@ -88,6 +91,9 @@ extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); +DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), + get_unused_fd_flags(flags), unsigned flags) + extern void fd_install(unsigned int fd, struct file *file); extern int __receive_fd(struct file *file, int __user *ufd, diff --git a/include/linux/filter.h b/include/linux/filter.h index efc42a6e3aed..face590b24e1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -495,24 +495,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) -#define BPF_CALL_x(x, name, ...) \ +#define BPF_CALL_x(x, attr, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) -#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) -#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) -#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) -#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) -#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define __NOATTR +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__) + +#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a1911dcf834..092d8fa10153 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -337,6 +337,8 @@ enum rw_hint { #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) struct kiocb { struct file *ki_filp; @@ -1187,6 +1189,13 @@ extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); extern bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner); + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return smp_load_acquire(&inode->i_flctx); +} + #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1332,6 +1341,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, { return false; } + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return NULL; +} + #endif /* !CONFIG_FILE_LOCKING */ static inline struct inode *file_inode(const struct file *f) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 65a78773dcca..e2ccb47c4264 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -341,6 +341,15 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); extern void pm_restrict_gfp_mask(void); extern void pm_restore_gfp_mask(void); +/* + * Check if the gfp flags allow compaction - GFP_NOIO is a really + * tricky context because the migration might require IO. + */ +static inline bool gfp_compaction_allowed(gfp_t gfp_mask) +{ + return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO); +} + extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f2044d5a652b..254d4a898179 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -197,6 +197,7 @@ enum hrtimer_base_type { * @max_hang_time: Maximum time spent in hrtimer_interrupt * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are * expired + * @online: CPU is online from an hrtimers point of view * @timer_waiters: A hrtimer_cancel() invocation waits for the timer * callback to finish. * @expires_next: absolute time of the next event, is required for remote @@ -219,7 +220,8 @@ struct hrtimer_cpu_base { unsigned int hres_active : 1, in_hrtirq : 1, hang_detected : 1, - softirq_activated : 1; + softirq_activated : 1, + online : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 646f1da9f27e..4fbd5d841711 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714..719cf9cc6e1a 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index db4a1b260348..c34e648f07e2 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -261,9 +261,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index bcbefb757475..af083aa0c431 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include #include +#include #include #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; diff --git a/include/linux/init.h b/include/linux/init.h index 077d7f93b402..c96aea3229ca 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -87,9 +87,6 @@ __latent_entropy #define __meminitdata __section(".meminit.data") #define __meminitconst __section(".meminit.rodata") -#define __memexit __section(".memexit.text") __exitused __cold notrace -#define __memexitdata __section(".memexit.data") -#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index a1484cdb3158..a8f3058448ea 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -42,11 +42,11 @@ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, unsigned issue_flags); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned)); -struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); const char *io_uring_get_opcode(u8 opcode); +bool io_is_uring_fops(struct file *file); static inline void io_uring_files_cancel(void) { @@ -71,6 +71,10 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, { return -EOPNOTSUPP; } +static inline bool io_is_uring_fops(struct file *file) +{ + return false; +} static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t ret2, unsigned issue_flags) { @@ -79,10 +83,6 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { } -static inline struct sock *io_uring_get_socket(struct file *file) -{ - return NULL; -} static inline void io_uring_task_cancel(void) { } diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f5b687a787a3..37aeea266ebb 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -330,9 +330,6 @@ struct io_ring_ctx { struct list_head io_buffers_pages; - #if defined(CONFIG_UNIX) - struct socket *ring_sock; - #endif /* hashed buffered write serialization */ struct io_wq_hash *hash_map; diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 8cd11a223260..136f2980cba3 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -66,6 +66,9 @@ void irq_work_sync(struct irq_work *work); void irq_work_run(void); bool irq_work_needs_cpu(void); void irq_work_single(void *arg); + +void arch_irq_work_raise(void); + #else static inline bool irq_work_needs_cpu(void) { return false; } static inline void irq_work_run(void) { } diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5ec0fa71399e..2b665c32f5fe 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -13,6 +13,7 @@ #define _LINUX_TRACE_IRQFLAGS_H #include +#include #include #include @@ -267,4 +268,10 @@ extern void warn_bogus_irq_restore(void); #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +DEFINE_LOCK_GUARD_0(irq, local_irq_disable(), local_irq_enable()) +DEFINE_LOCK_GUARD_0(irqsave, + local_irq_save(_T->flags), + local_irq_restore(_T->flags), + unsigned long flags) + #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index 45910aebc377..6645259be143 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -102,6 +102,7 @@ enum { ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 02b19c508b78..6239a378c0ea 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -165,6 +165,8 @@ LSM_HOOK(int, 0, file_alloc_security, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) +LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, + unsigned long arg) LSM_HOOK(int, 0, mmap_addr, unsigned long addr) LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index b0da04fe087b..34dfcc77f505 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -126,10 +126,11 @@ struct cmos_rtc_board_info { #endif /* ARCH_RTC_LOCATION */ bool mc146818_does_rtc_work(void); -int mc146818_get_time(struct rtc_time *time); +int mc146818_get_time(struct rtc_time *time, int timeout); int mc146818_set_time(struct rtc_time *time); bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param); #endif /* _MC146818RTC_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 758a063f9b8f..31f9bc12c3d7 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -122,6 +122,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 5433c08fcc68..1aea34b8f19b 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -51,6 +51,23 @@ */ #define max(x, y) __careful_cmp(x, y, >) +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp((x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull, <) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp((x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull, >) + /** * min3 - return minimum of three values * @x: first value diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4657d5c54abe..ca0eee571ad7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 74f9d9a6d330..0e4ef9c5127a 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -102,6 +102,7 @@ #define SDIO_DEVICE_ID_MARVELL_8977_BT 0x9146 #define SDIO_DEVICE_ID_MARVELL_8987_WLAN 0x9149 #define SDIO_DEVICE_ID_MARVELL_8987_BT 0x914a +#define SDIO_DEVICE_ID_MARVELL_8978_WLAN 0x9159 #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 323ee36df683..3b9f4d7c40c3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1622,6 +1622,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { + struct rcu_head rcu; #ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif @@ -1815,7 +1816,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); - return test_bit(idx, ms->usage->subsection_map); + return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) @@ -1839,6 +1840,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; + int ret; /* * Ensure the upper PAGE_SHIFT bits are clear in the @@ -1852,13 +1854,19 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - if (!valid_section(ms)) + rcu_read_lock_sched(); + if (!valid_section(ms)) { + rcu_read_unlock_sched(); return 0; + } /* * Traditionally early sections always returned pfn_valid() for * the entire section-sized span. */ - return early_section(ms) || pfn_section_valid(ms, pfn); + ret = early_section(ms) || pfn_section_valid(ms, pfn); + rcu_read_unlock_sched(); + + return ret; } #endif diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1322652a9d0d..7dc186ec52a2 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -95,6 +95,14 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +#ifdef CONFIG_MODULES +void flush_module_init_free_work(void); +#else +static inline void flush_module_init_free_work(void) +{ +} +#endif + /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 8f226d460f51..a33aa9eb9fc3 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -219,4 +220,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) +DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bef8db9d6c08..e5f4b6f8d1c0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -437,11 +437,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #include void nf_ct_attach(struct sk_buff *, const struct sk_buff *); +void nf_ct_set_closing(struct nf_conntrack *nfct); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) @@ -459,6 +461,8 @@ struct nf_ct_hook { bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); + void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 72f5ebc5c97a..0b217d4ae2a4 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8d04b6a5964c..730003c4f4af 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -732,4 +732,17 @@ enum nfs4_setxattr_options { SETXATTR4_CREATE = 1, SETXATTR4_REPLACE = 2, }; + +enum { + RCA4_TYPE_MASK_RDATA_DLG = 0, + RCA4_TYPE_MASK_WDATA_DLG = 1, + RCA4_TYPE_MASK_DIR_DLG = 2, + RCA4_TYPE_MASK_FILE_LAYOUT = 3, + RCA4_TYPE_MASK_BLK_LAYOUT = 4, + RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, + RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, + RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, + RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, +}; + #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7931fa472561..ac7d799d9d38 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -582,6 +582,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline bool nfs_have_writebacks(const struct inode *inode) diff --git a/include/linux/pci.h b/include/linux/pci.h index bf0ac07394e0..0c80bdb145c5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2358,6 +2358,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) return NULL; } +static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +{ + return dev->error_state == pci_channel_io_perm_failure; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d235f2a9c65a..70b1d8dde991 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3005,6 +3005,7 @@ #define PCI_DEVICE_ID_INTEL_82443GX_0 0x71a0 #define PCI_DEVICE_ID_INTEL_82443GX_2 0x71a2 #define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601 +#define PCI_DEVICE_ID_INTEL_HDA_ARL 0x7728 #define PCI_DEVICE_ID_INTEL_SCH_LPC 0x8119 #define PCI_DEVICE_ID_INTEL_SCH_IDE 0x811a #define PCI_DEVICE_ID_INTEL_E6XX_CU 0x8183 diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f1ec5ad1351c..ba00a49369ca 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -128,6 +129,9 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); + +DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) + extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #define alloc_percpu_gfp(type, gfp) \ diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 70998e6dd6fd..6ca51e0080ec 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -26,6 +26,7 @@ void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); +int tegra_xusb_padctl_get_port_number(struct phy *phy); int tegra_xusb_padctl_enable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy, enum usb_device_speed speed); int tegra_xusb_padctl_disable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 28b3c6a67397..1f1e7ae95320 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -124,6 +124,22 @@ struct pipe_buf_operations { bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; +/** + * pipe_has_watch_queue - Check whether the pipe is a watch_queue, + * i.e. it was created with O_NOTIFICATION_PIPE + * @pipe: The pipe to check + * + * Return: true if pipe is a watch queue, false otherwise. + */ +static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) +{ +#ifdef CONFIG_WATCH_QUEUE + return pipe->watch_queue != NULL; +#else + return false; +#endif +} + /** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer diff --git a/include/linux/poll.h b/include/linux/poll.h index a9e0e1c2d1f2..d1ea4f3714a8 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -14,11 +14,7 @@ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ -#ifdef __clang__ -#define MAX_STACK_ALLOC 768 -#else #define MAX_STACK_ALLOC 832 -#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 8cfcc5d45451..9aa6358a1a16 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -8,6 +8,7 @@ */ #include +#include #include /* @@ -474,4 +475,8 @@ static __always_inline void preempt_enable_nested(void) preempt_enable(); } +DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) +DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) +DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 46bd9a331fd5..319698087d66 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -267,6 +268,37 @@ do { \ cond_resched(); \ } while (0) +/** + * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states + * @old_ts: jiffies at start of processing. + * + * This helper is for long-running softirq handlers, such as NAPI threads in + * networking. The caller should initialize the variable passed in as @old_ts + * at the beginning of the softirq handler. When invoked frequently, this macro + * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will + * provide both RCU and RCU-Tasks quiescent states. Note that this macro + * modifies its old_ts argument. + * + * Because regions of code that have disabled softirq act as RCU read-side + * critical sections, this macro should be invoked with softirq (and + * preemption) enabled. + * + * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would + * have more chance to invoke schedule() calls and provide necessary quiescent + * states. As a contrast, calling cond_resched() only won't achieve the same + * effect because cond_resched() does not provide RCU-Tasks quiescent states. + */ +#define rcu_softirq_qs_periodic(old_ts) \ +do { \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \ + time_after(jiffies, (old_ts) + HZ / 10)) { \ + preempt_disable(); \ + rcu_softirq_qs(); \ + preempt_enable(); \ + (old_ts) = jiffies; \ + } \ +} while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. @@ -1077,4 +1109,6 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) extern int rcu_expedited; extern int rcu_normal; +DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock()) + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3c7d295746f6..3e7bfc0f65ae 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -98,6 +98,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +typedef bool (*ring_buffer_cond_fn)(void *data); int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 2bdba700bc3e..7d1ecd9b4373 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -250,8 +250,8 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound, * guarantee the pinned page won't be randomly replaced in the * future on write faults. */ - if (likely(!is_device_private_page(page) && - unlikely(page_needs_cow_for_dma(vma, page)))) + if (likely(!is_device_private_page(page)) && + unlikely(page_needs_cow_for_dma(vma, page))) return -EBUSY; ClearPageAnonExclusive(page); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index efa5c324369a..1dd530ce8b45 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ @@ -201,6 +202,13 @@ extern void up_read(struct rw_semaphore *sem); */ extern void up_write(struct rw_semaphore *sem); +DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) +DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) + +DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) +DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) + + /* * downgrade write lock to read lock */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 7291fb6399d2..aaa25ed1a8fe 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -145,6 +145,8 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T)) + static inline void put_task_struct_many(struct task_struct *t, int nr) { if (refcount_sub_and_test(nr, &t->usage)) diff --git a/include/linux/security.h b/include/linux/security.h index a6c97cc57caa..2772f6375f14 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -385,6 +385,8 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int security_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg); int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags); int security_mmap_addr(unsigned long addr); @@ -969,6 +971,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } +static inline int security_file_ioctl_compat(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + return 0; +} + static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d657f2a42a7b..13bf20242b61 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -583,6 +583,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c953b8c0d2f4..bd4418377bac 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -500,12 +500,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } -static inline bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/linux/slab.h b/include/linux/slab.h index 92bf70c8ad89..79d73ad0cf0e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -17,6 +17,7 @@ #include #include #include +#include /* @@ -197,6 +198,8 @@ void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); +DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) + /** * ksize - Report actual allocation size of associated object * diff --git a/include/linux/socket.h b/include/linux/socket.h index b3c58042bd25..d79efd026880 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - char sa_data[14]; /* 14 bytes of protocol address */ + union { + char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ + DECLARE_FLEX_ARRAY(char, sa_data); + }; }; struct linger { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 1341f7d62da4..83377540c369 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -493,5 +494,35 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, void free_bucket_spinlocks(spinlock_t *locks); +DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, + raw_spin_lock(_T->lock), + raw_spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, + raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), + raw_spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, + raw_spin_lock_irq(_T->lock), + raw_spin_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, + raw_spin_lock_irqsave(_T->lock, _T->flags), + raw_spin_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, + spin_lock(_T->lock), + spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, + spin_lock_irq(_T->lock), + spin_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, + spin_lock_irqsave(_T->lock, _T->flags), + spin_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d960a..f9e1fa7ff86f 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -212,4 +212,9 @@ static inline void smp_mb__after_srcu_read_unlock(void) /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ } +DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct, + _T->idx = srcu_read_lock(_T->lock), + srcu_read_unlock(_T->lock, _T->idx), + int idx) + #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index a18cf4b7c724..add47f43e568 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -571,6 +571,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a34b0f9a9972..27a6ba1c0ec4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -123,6 +123,7 @@ enum landlock_rule_type; #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a +#define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) diff --git a/include/linux/timer.h b/include/linux/timer.h index 648f00105f58..6d18f04ad703 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -183,12 +183,20 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync(struct timer_list *timer); -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) - extern int del_timer_sync(struct timer_list *timer); -#else -# define del_timer_sync(t) del_timer(t) -#endif +/** + * del_timer_sync - Delete a pending timer and wait for a running callback + * @timer: The timer to be deleted + * + * See timer_delete_sync() for detailed explanation. + * + * Do not use in new code. Use timer_delete_sync() instead. + */ +static inline int del_timer_sync(struct timer_list *timer) +{ + return timer_delete_sync(timer); +} #define del_singleshot_timer_sync(t) del_timer_sync(t) diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 814f65799200..1c46a0104224 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -412,6 +412,8 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite, extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev, struct usb_ep *ep0); void composite_dev_cleanup(struct usb_composite_dev *cdev); +void check_remote_wakeup_config(struct usb_gadget *g, + struct usb_configuration *c); static inline struct usb_composite_driver *to_cdriver( struct usb_gadget_driver *gdrv) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index dc3092cea99e..5bec668b41dc 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -309,6 +309,7 @@ struct usb_udc; struct usb_gadget_ops { int (*get_frame)(struct usb_gadget *); int (*wakeup)(struct usb_gadget *); + int (*set_remote_wakeup)(struct usb_gadget *, int set); int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered); int (*vbus_session) (struct usb_gadget *, int is_active); int (*vbus_draw) (struct usb_gadget *, unsigned mA); @@ -383,6 +384,8 @@ struct usb_gadget_ops { * @connected: True if gadget is connected. * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag * indicates that it supports LPM as per the LPM ECN & errata. + * @wakeup_capable: True if gadget is capable of sending remote wakeup. + * @wakeup_armed: True if gadget is armed by the host for remote wakeup. * @irq: the interrupt number for device controller. * @id_number: a unique ID number for ensuring that gadget names are distinct * @@ -444,6 +447,8 @@ struct usb_gadget { unsigned deactivated:1; unsigned connected:1; unsigned lpm_capable:1; + unsigned wakeup_capable:1; + unsigned wakeup_armed:1; int irq; int id_number; }; @@ -600,6 +605,7 @@ static inline int gadget_is_otg(struct usb_gadget *g) #if IS_ENABLED(CONFIG_USB_GADGET) int usb_gadget_frame_number(struct usb_gadget *gadget); int usb_gadget_wakeup(struct usb_gadget *gadget); +int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set); int usb_gadget_set_selfpowered(struct usb_gadget *gadget); int usb_gadget_clear_selfpowered(struct usb_gadget *gadget); int usb_gadget_vbus_connect(struct usb_gadget *gadget); @@ -615,6 +621,8 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_wakeup(struct usb_gadget *gadget) { return 0; } +static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) +{ return 0; } static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index fdd393f70b19..5e7bf143cb22 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -268,6 +268,7 @@ struct virqfd { wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; + struct work_struct flush_inject; struct virqfd **pvirqfd; }; @@ -275,5 +276,6 @@ int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), void *data, struct virqfd **pvirqfd, int fd); void vfio_virqfd_disable(struct virqfd **pvirqfd); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); #endif /* VFIO_H */ diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 28c9de8a1f34..03bb0963942b 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -205,6 +205,7 @@ enum media_pad_signal_type { * @graph_obj: Embedded structure containing the media object common data * @entity: Entity this pad belongs to * @index: Pad index in the entity pads array, numbered from 0 to n + * @num_links: Number of links connected to this pad * @sig_type: Type of the signal inside a media pad * @flags: Pad flags, as defined in * :ref:`include/uapi/linux/media.h ` @@ -216,6 +217,7 @@ struct media_pad { struct media_gobj graph_obj; /* must be first field in struct */ struct media_entity *entity; u16 index; + u16 num_links; enum media_pad_signal_type sig_type; unsigned long flags; diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 55ca217c626b..0920b669b9b3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -45,12 +45,6 @@ struct scm_stat { #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_lock_nested(s) \ - spin_lock_nested(&unix_sk(s)->lock, \ - SINGLE_DEPTH_NESTING) - /* The AF_UNIX socket */ struct unix_sock { /* WARNING: sk has to be the first member */ @@ -79,6 +73,20 @@ static inline struct unix_sock *unix_sk(const struct sock *sk) } #define unix_peer(sk) (unix_sk(sk)->peer) +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) +enum unix_socket_lock_class { + U_LOCK_NORMAL, + U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ + U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ +}; + +static inline void unix_state_lock_nested(struct sock *sk, + enum unix_socket_lock_class subclass) +{ + spin_lock_nested(&unix_sk(sk)->lock, subclass); +} + #define peer_wait peer_wq.wait long unix_inq_len(struct sock *sk); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a674221d151d..c69e09909449 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -416,7 +416,6 @@ enum { #define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ -#define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 09c978f3d95d..c50a41f1782a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -81,7 +81,7 @@ struct discovery_state { u8 last_adv_addr_type; s8 last_adv_rssi; u32 last_adv_flags; - u8 last_adv_data[HCI_MAX_AD_LENGTH]; + u8 last_adv_data[HCI_MAX_EXT_AD_LENGTH]; u8 last_adv_data_len; bool report_invalid_rssi; bool result_filtering; @@ -293,7 +293,7 @@ struct adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[HCI_MAX_AD_LENGTH]; + __u8 value[HCI_MAX_EXT_AD_LENGTH]; }; struct adv_rssi_thresholds { @@ -549,6 +549,7 @@ struct hci_dev { __u32 req_status; __u32 req_result; struct sk_buff *req_skb; + struct sk_buff *req_rsp; void *smp_data; void *smp_bredr_data; @@ -726,7 +727,7 @@ struct hci_conn { __u16 le_conn_interval; __u16 le_conn_latency; __u16 le_supv_timeout; - __u8 le_adv_data[HCI_MAX_AD_LENGTH]; + __u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH]; __u8 le_adv_data_len; __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH]; __u8 le_per_adv_data_len; @@ -739,6 +740,7 @@ struct hci_conn { unsigned long flags; enum conn_reasons conn_reason; + __u8 abort_reason; __u32 clock; __u16 clock_accuracy; @@ -758,7 +760,6 @@ struct hci_conn { struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; - struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; @@ -1709,6 +1710,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) +/* Maximum advertising length */ +#define max_adv_len(dev) \ + (ext_adv_capable(dev) ? HCI_MAX_EXT_AD_LENGTH : HCI_MAX_AD_LENGTH) + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789: * * C24: Mandatory if the LE Controller supports Connection State and either diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 17f5a4c32f36..59d15b1a978a 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -39,8 +39,10 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err); +int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index d8d8719315fd..5f7f28c9edcb 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -267,6 +267,7 @@ struct ieee802154_llsec_key { struct ieee802154_llsec_key_entry { struct list_head list; + struct rcu_head rcu; struct ieee802154_llsec_key_id id; struct ieee802154_llsec_key *key; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c2b15f7e5516..080968d6e6c5 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -346,4 +346,12 @@ static inline bool inet_csk_has_ulp(struct sock *sk) return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; } +static inline void inet_init_csk_locks(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + spin_lock_init(&icsk->icsk_accept_queue.rskq_lock); + spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock); +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c2432c2addc8..c98890e21401 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -270,11 +270,6 @@ struct inet_sock { #define IP_CMSG_CHECKSUM BIT(7) #define IP_CMSG_RECVFRAGSIZE BIT(8) -static inline bool sk_is_inet(struct sock *sk) -{ - return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; -} - /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket diff --git a/include/net/ip.h b/include/net/ip.h index c83c09c65623..4f11f7df7dd6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -752,7 +752,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); * Functions provided by ip_sockglue.c */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..21da31e1dff5 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 49aa79c7b278..581cd37aa98b 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); + memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** @@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); + memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** diff --git a/include/net/mctp.h b/include/net/mctp.h index 82800d521c3d..7ed84054f462 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -249,6 +249,7 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); +/* always takes ownership of skb */ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6a2019aaa464..3dbf947285be 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -125,6 +125,12 @@ struct nf_conn { union nf_conntrack_proto proto; }; +static inline struct nf_conn * +nf_ct_to_nf_conn(const struct nf_conntrack *nfct) +{ + return container_of(nfct, struct nf_conn, ct_general); +} + static inline struct nf_conn * nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) { @@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) void nf_ct_destroy(struct nf_conntrack *nfct); +void nf_conntrack_tcp_set_closing(struct nf_conn *ct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index dde4dd9c4012..4a767b3d20b9 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -274,8 +274,8 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, flow_table->type->put(flow_table); } -int flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); +void flow_offload_route_init(struct flow_offload *flow, + struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c726da3b7d68..2fa344cb66f6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1286,6 +1286,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, * @type: stateful object numeric type * @owner: module owner * @maxattr: maximum netlink attribute + * @family: address family for AF-specific object types * @policy: netlink attribute policy */ struct nft_object_type { @@ -1295,6 +1296,7 @@ struct nft_object_type { struct list_head list; u32 type; unsigned int maxattr; + u8 family; struct module *owner; const struct nla_policy *policy; }; diff --git a/include/net/sock.h b/include/net/sock.h index 6b51e85ae69e..579732d47dfc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2824,9 +2824,25 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +static inline bool sk_is_inet(const struct sock *sk) +{ + int family = READ_ONCE(sk->sk_family); + + return family == AF_INET || family == AF_INET6; +} + static inline bool sk_is_tcp(const struct sock *sk) { - return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; + return sk_is_inet(sk) && + sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk_is_inet(sk) && + sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; } static inline bool sk_is_stream_unix(const struct sock *sk) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 7dcdc97c0bc3..a3d8f013adcd 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -303,6 +303,9 @@ void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct netlink_ext_ack *extack); +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); diff --git a/include/net/tcp.h b/include/net/tcp.h index 4c838f7290dd..8ea1fba84eff 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2290,7 +2290,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/include/net/tls.h b/include/net/tls.h index c36bf4c50027..899c863aba02 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -108,9 +108,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -147,8 +144,6 @@ struct tls_sw_context_rx { struct tls_strparser strp; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; struct sk_buff_head async_hold; struct wait_queue_head wq; }; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index fdc31fdb612d..a64713fe5264 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -100,10 +100,6 @@ struct scsi_vpd { unsigned char data[]; }; -enum scsi_vpd_parameters { - SCSI_VPD_HEADER_SIZE = 4, -}; - struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; @@ -208,6 +204,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ @@ -483,28 +480,51 @@ extern const char *scsi_device_state_name(enum scsi_device_state); extern int scsi_is_sdev_device(const struct device *); extern int scsi_is_target_device(const struct device *); extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); -extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid); + +/* Optional arguments to scsi_execute_cmd */ +struct scsi_exec_args { + unsigned char *sense; /* sense buffer */ + unsigned int sense_len; /* sense buffer len */ + struct scsi_sense_hdr *sshdr; /* decoded sense header */ + blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */ + int *resid; /* residual length */ +}; + +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args); + /* Make sure any sense buffer is the correct size. */ -#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ - sshdr, timeout, retries, flags, rq_flags, resid) \ +#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \ + _sshdr, _timeout, _retries, _flags, _rq_flags, \ + _resid) \ ({ \ - BUILD_BUG_ON((sense) != NULL && \ - sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \ - __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \ - sense, sshdr, timeout, retries, flags, rq_flags, \ - resid); \ + scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \ + REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \ + _buffer, _bufflen, _timeout, _retries, \ + &(struct scsi_exec_args) { \ + .sense = _sense, \ + .sshdr = _sshdr, \ + .req_flags = _rq_flags & RQF_PM ? \ + BLK_MQ_REQ_PM : 0, \ + .resid = _resid, \ + }); \ }) + static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, int *resid) { - return scsi_execute(sdev, cmd, data_direction, buffer, - bufflen, NULL, sshdr, timeout, retries, 0, 0, resid); + return scsi_execute_cmd(sdev, cmd, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, + bufflen, timeout, retries, + &(struct scsi_exec_args) { + .sshdr = sshdr, + .resid = resid, + }); } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 4ce1988b2ba0..f40915d2ecee 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -12,6 +12,7 @@ struct request; struct scsi_driver { struct device_driver gendrv; + int (*resume)(struct device *); void (*rescan)(struct device *); blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 71def41b1ad7..149b63e3534a 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -752,6 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, struct device *, struct device *); extern void scsi_scan_host(struct Scsi_Host *); +extern int scsi_resume_device(struct scsi_device *sdev); extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index a3995925cb05..1f4258308b96 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,14 +81,14 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; @@ -106,14 +106,14 @@ TRACE_EVENT(qdisc_destroy, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index c9048f3e471b..3f121eed369e 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -13,7 +13,7 @@ #include -#include +#include /** ** GSS-API related trace events diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index fcd3b3f1020a..8f461e04e5f0 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -15,8 +15,8 @@ #include #include -#include -#include +#include +#include /** ** Event classes diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f48f2ab9d238..ffe2679a13ce 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,7 +14,7 @@ #include #include -#include +#include TRACE_DEFINE_ENUM(SOCK_STREAM); TRACE_DEFINE_ENUM(SOCK_DGRAM); diff --git a/include/trace/events/fs.h b/include/trace/misc/fs.h similarity index 100% rename from include/trace/events/fs.h rename to include/trace/misc/fs.h diff --git a/include/trace/events/nfs.h b/include/trace/misc/nfs.h similarity index 96% rename from include/trace/events/nfs.h rename to include/trace/misc/nfs.h index 09ffdbb04134..0d9d48dca38a 100644 --- a/include/trace/events/nfs.h +++ b/include/trace/misc/nfs.h @@ -360,6 +360,18 @@ TRACE_DEFINE_ENUM(IOMODE_ANY); { IOMODE_RW, "RW" }, \ { IOMODE_ANY, "ANY" }) +#define show_rca_mask(x) \ + __print_flags(x, "|", \ + { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \ + { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" }) + #define show_nfs4_seq4_status(x) \ __print_flags(x, "|", \ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ diff --git a/include/trace/events/rdma.h b/include/trace/misc/rdma.h similarity index 100% rename from include/trace/events/rdma.h rename to include/trace/misc/rdma.h diff --git a/include/trace/events/sunrpc_base.h b/include/trace/misc/sunrpc.h similarity index 100% rename from include/trace/events/sunrpc_base.h rename to include/trace/misc/sunrpc.h diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 201dc77ebbd7..d5d2183730b9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3109,6 +3109,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -3117,6 +3121,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6687,6 +6696,8 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6699,6 +6710,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6733,6 +6745,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -6747,9 +6762,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index d4d4fa0bb362..24b54635bae9 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -585,6 +585,9 @@ struct btrfs_ioctl_clone_range_args { */ #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 +#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ + BTRFS_DEFRAG_RANGE_START_IO) + struct btrfs_ioctl_defrag_range_args { /* start of the defrag operation */ __u64 start; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index c4c53a9ab959..ff8d21f9e95b 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,7 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ -#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ +#define IPV6_TLV_IOAM 49 /* RFC 9486 */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index af8f4c304d27..707af820f1a9 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -266,9 +266,11 @@ enum nft_rule_attributes { /** * enum nft_rule_compat_flags - nf_tables rule compat flags * + * @NFT_RULE_COMPAT_F_UNUSED: unused * @NFT_RULE_COMPAT_F_INV: invert the check result */ enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_UNUSED = (1 << 0), NFT_RULE_COMPAT_F_INV = (1 << 1), NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, }; diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index a7085e092d34..3a0cde4dcf33 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -22,10 +22,15 @@ enum irdma_memreg_type { IRDMA_MEMREG_TYPE_CQ = 2, }; +enum { + IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, +}; + struct irdma_alloc_ucontext_req { __u32 rsvd32; __u8 userspace_ver; __u8 rsvd8[3]; + __aligned_u64 comp_mask; }; struct irdma_alloc_ucontext_resp { @@ -46,6 +51,7 @@ struct irdma_alloc_ucontext_resp { __u16 max_hw_sq_chunk; __u8 hw_rev; __u8 rsvd2; + __aligned_u64 comp_mask; }; struct irdma_alloc_pd_resp { diff --git a/init/Kconfig b/init/Kconfig index 6afa096df69d..7e6596329c54 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) +config GCC_ASM_GOTO_OUTPUT_WORKAROUND + bool + depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT + # Fixed in GCC 14, 13.3, 12.4 and 11.5 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 + config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) @@ -893,14 +902,14 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-11+ array-bounds globally. +# Currently, disable gcc-10+ array-bounds globally. # It's still broken in gcc-13, so no upper bound yet. -config GCC11_NO_ARRAY_BOUNDS +config GCC10_NO_ARRAY_BOUNDS def_bool y config CC_NO_ARRAY_BOUNDS bool - default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS + default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS # # For architectures that know their GCC __int128 support is sound diff --git a/init/initramfs.c b/init/initramfs.c index b3c020016065..0346f95f0774 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -684,7 +684,7 @@ static void __init populate_initrd_image(char *err) printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); if (IS_ERR(file)) return; diff --git a/init/main.c b/init/main.c index 10f5110b6672..7e142f48fd2d 100644 --- a/init/main.c +++ b/init/main.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -1681,11 +1682,11 @@ static void mark_readonly(void) if (rodata_enabled) { /* * load_module() results in W+X mappings, which are cleaned - * up with call_rcu(). Let's make sure that queued work is + * up with init_free_wq. Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier(); + flush_module_init_free_work(); mark_rodata_ro(); rodata_test(); } else diff --git a/io_uring/filetable.c b/io_uring/filetable.c index b80614e7d605..4660cb89ea9f 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -95,12 +95,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, needs_switch = true; } - ret = io_scm_file_account(ctx, file); - if (!ret) { - *io_get_tag_slot(ctx->file_data, slot_index) = 0; - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, slot_index); - } + *io_get_tag_slot(ctx->file_data, slot_index) = 0; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, slot_index); + return 0; err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 35894955b454..415248c1f82c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -60,7 +60,6 @@ #include #include #include -#include #include #include #include @@ -153,19 +152,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx); static struct kmem_cache *req_cachep; -struct sock *io_uring_get_socket(struct file *file) -{ -#if defined(CONFIG_UNIX) - if (io_is_uring_fops(file)) { - struct io_ring_ctx *ctx = file->private_data; - - return ctx->ring_sock->sk; - } -#endif - return NULL; -} -EXPORT_SYMBOL(io_uring_get_socket); - static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) { if (!wq_list_empty(&ctx->submit_state.compl_reqs)) @@ -2641,12 +2627,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - ctx->ring_sock->file = NULL; /* so that iput() is called */ - sock_release(ctx->ring_sock); - } -#endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); if (ctx->mm_account) { @@ -3451,32 +3431,12 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this - * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, - * we have to tie this fd to a socket for file garbage collection purposes. + * fd to gain access to the SQ/CQ ring details. */ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { - struct file *file; -#if defined(CONFIG_UNIX) - int ret; - - ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, - &ctx->ring_sock); - if (ret) - return ERR_PTR(ret); -#endif - - file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC, NULL); -#if defined(CONFIG_UNIX) - if (IS_ERR(file)) { - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; - } else { - ctx->ring_sock->file = file; - } -#endif - return file; } static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 019600570ee4..59e6f755f12c 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -52,7 +52,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) } void __io_req_task_work_add(struct io_kiocb *req, bool allow_local); -bool io_is_uring_fops(struct file *file); bool io_alloc_async_data(struct io_kiocb *req); void io_req_task_queue(struct io_kiocb *req); void io_queue_iowq(struct io_kiocb *req, bool *dont_use); diff --git a/io_uring/net.c b/io_uring/net.c index 67f09a40bcb2..b1b564c04d1e 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -183,16 +183,115 @@ static int io_setup_async_msg(struct io_kiocb *req, return -EAGAIN; } -static int io_sendmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) +#ifdef CONFIG_COMPAT +static int io_compat_msg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + struct compat_msghdr *msg, int ddir) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct compat_iovec __user *uiov; + int ret; + + if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) + return -EFAULT; + + uiov = compat_ptr(msg->msg_iov); + if (req->flags & REQ_F_BUFFER_SELECT) { + compat_ssize_t clen; + + iomsg->free_iov = NULL; + if (msg->msg_iovlen == 0) { + sr->len = 0; + } else if (msg->msg_iovlen > 1) { + return -EINVAL; + } else { + if (!access_ok(uiov, sizeof(*uiov))) + return -EFAULT; + if (__get_user(clen, &uiov->iov_len)) + return -EFAULT; + if (clen < 0) + return -EINVAL; + sr->len = clen; + } + + return 0; + } + + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, + UIO_FASTIOV, &iomsg->free_iov, + &iomsg->msg.msg_iter, true); + if (unlikely(ret < 0)) + return ret; + + return 0; +} +#endif + +static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, + struct user_msghdr *msg, int ddir) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); int ret; - iomsg->msg.msg_name = &iomsg->addr; + if (copy_from_user(msg, sr->umsg, sizeof(*sr->umsg))) + return -EFAULT; + + if (req->flags & REQ_F_BUFFER_SELECT) { + if (msg->msg_iovlen == 0) { + sr->len = iomsg->fast_iov[0].iov_len = 0; + iomsg->fast_iov[0].iov_base = NULL; + iomsg->free_iov = NULL; + } else if (msg->msg_iovlen > 1) { + return -EINVAL; + } else { + if (copy_from_user(iomsg->fast_iov, msg->msg_iov, + sizeof(*msg->msg_iov))) + return -EFAULT; + sr->len = iomsg->fast_iov[0].iov_len; + iomsg->free_iov = NULL; + } + + return 0; + } + iomsg->free_iov = iomsg->fast_iov; - ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, - &iomsg->free_iov); + ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV, + &iomsg->free_iov, &iomsg->msg.msg_iter, false); + if (unlikely(ret < 0)) + return ret; + + return 0; +} + +static int io_sendmsg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct user_msghdr msg; + int ret; + + iomsg->msg.msg_name = &iomsg->addr; + iomsg->msg.msg_iter.nr_segs = 0; + +#ifdef CONFIG_COMPAT + if (unlikely(req->ctx->compat)) { + struct compat_msghdr cmsg; + + ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); + if (unlikely(ret)) + return ret; + + return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + } +#endif + + ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); + if (unlikely(ret)) + return ret; + + ret = __copy_msghdr(&iomsg->msg, &msg, NULL); + /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = iomsg->msg.msg_control_user; return ret; @@ -415,142 +514,77 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) +static int io_recvmsg_mshot_prep(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + int namelen, size_t controllen) { - int hdr; + if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == + (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { + int hdr; - if (iomsg->namelen < 0) - return true; - if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), - iomsg->namelen, &hdr)) - return true; - if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) - return true; + if (unlikely(namelen < 0)) + return -EOVERFLOW; + if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), + namelen, &hdr)) + return -EOVERFLOW; + if (check_add_overflow(hdr, controllen, &hdr)) + return -EOVERFLOW; - return false; -} - -static int __io_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct user_msghdr msg; - int ret; - - if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) - return -EFAULT; - - ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); - if (ret) - return ret; - - if (req->flags & REQ_F_BUFFER_SELECT) { - if (msg.msg_iovlen == 0) { - sr->len = iomsg->fast_iov[0].iov_len = 0; - iomsg->fast_iov[0].iov_base = NULL; - iomsg->free_iov = NULL; - } else if (msg.msg_iovlen > 1) { - return -EINVAL; - } else { - if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) - return -EFAULT; - sr->len = iomsg->fast_iov[0].iov_len; - iomsg->free_iov = NULL; - } - - if (req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, - &iomsg->free_iov, &iomsg->msg.msg_iter, - false); - if (ret > 0) - ret = 0; - } - - return ret; -} - -#ifdef CONFIG_COMPAT -static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct compat_msghdr msg; - struct compat_iovec __user *uiov; - int ret; - - if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) - return -EFAULT; - - ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); - if (ret) - return ret; - - uiov = compat_ptr(msg.msg_iov); - if (req->flags & REQ_F_BUFFER_SELECT) { - compat_ssize_t clen; - - iomsg->free_iov = NULL; - if (msg.msg_iovlen == 0) { - sr->len = 0; - } else if (msg.msg_iovlen > 1) { - return -EINVAL; - } else { - if (!access_ok(uiov, sizeof(*uiov))) - return -EFAULT; - if (__get_user(clen, &uiov->iov_len)) - return -EFAULT; - if (clen < 0) - return -EINVAL; - sr->len = clen; - } - - if (req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen, - UIO_FASTIOV, &iomsg->free_iov, - &iomsg->msg.msg_iter, true); - if (ret < 0) - return ret; + iomsg->namelen = namelen; + iomsg->controllen = controllen; + return 0; } return 0; } -#endif static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { + struct user_msghdr msg; + int ret; + iomsg->msg.msg_name = &iomsg->addr; iomsg->msg.msg_iter.nr_segs = 0; #ifdef CONFIG_COMPAT - if (req->ctx->compat) - return __io_compat_recvmsg_copy_hdr(req, iomsg); + if (unlikely(req->ctx->compat)) { + struct compat_msghdr cmsg; + + ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); + if (unlikely(ret)) + return ret; + + ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); + if (unlikely(ret)) + return ret; + + return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, + cmsg.msg_controllen); + } #endif - return __io_recvmsg_copy_hdr(req, iomsg); + ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); + if (unlikely(ret)) + return ret; + + ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); + if (unlikely(ret)) + return ret; + + return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, + msg.msg_controllen); } int io_recvmsg_prep_async(struct io_kiocb *req) { + struct io_async_msghdr *iomsg; int ret; if (!io_msg_alloc_async_prep(req)) return -ENOMEM; - ret = io_recvmsg_copy_hdr(req, req->async_data); + iomsg = req->async_data; + ret = io_recvmsg_copy_hdr(req, iomsg); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; @@ -842,7 +876,8 @@ retry_multishot: kfree(kmsg->free_iov); io_netmsg_recycle(req, issue_flags); req->flags &= ~REQ_F_NEED_CLEANUP; - } + } else if (ret == -EAGAIN) + return io_setup_async_msg(req, kmsg, issue_flags); return ret; } @@ -875,6 +910,7 @@ retry_multishot: if (!buf) return -ENOBUFS; sr->buf = buf; + sr->len = len; } ret = import_single_range(ITER_DEST, sr->buf, len, &iov, &msg.msg_iter); @@ -1325,7 +1361,7 @@ retry: * has already been done */ if (issue_flags & IO_URING_F_MULTISHOT) - ret = IOU_ISSUE_SKIP_COMPLETE; + return IOU_ISSUE_SKIP_COMPLETE; return ret; } if (ret == -ERESTARTSYS) @@ -1349,7 +1385,8 @@ retry: if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) goto retry; - return -ECANCELED; + io_req_set_res(req, ret, 0); + return IOU_STOP_MULTISHOT; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7ada0339b387..ac658cfa89c6 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -494,11 +494,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - err = io_scm_file_account(ctx, file); - if (err) { - fput(file); - break; - } *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); @@ -762,22 +757,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) for (i = 0; i < ctx->nr_user_files; i++) { struct file *file = io_file_from_index(&ctx->file_table, i); - /* skip scm accounted files, they'll be freed by ->ring_sock */ - if (!file || io_file_need_scm(file)) + if (!file) continue; io_file_bitmap_clear(&ctx->file_table, i); fput(file); } -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) - kfree_skb(skb); - } -#endif io_free_file_tables(&ctx->file_table); io_file_table_set_alloc_range(ctx, 0, 0); io_rsrc_data_free(ctx->file_data); @@ -805,134 +790,11 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx) return ret; } -/* - * Ensure the UNIX gc is aware of our file set, so we are certain that - * the io_uring can be safely unregistered on process exit, even if we have - * loops in the file referencing. We account only files that can hold other - * files because otherwise they can't form a loop and so are not interesting - * for GC. - */ -int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) -{ -#if defined(CONFIG_UNIX) - struct sock *sk = ctx->ring_sock->sk; - struct sk_buff_head *head = &sk->sk_receive_queue; - struct scm_fp_list *fpl; - struct sk_buff *skb; - - if (likely(!io_file_need_scm(file))) - return 0; - - /* - * See if we can merge this file into an existing skb SCM_RIGHTS - * file set. If there's no room, fall back to allocating a new skb - * and filling it in. - */ - spin_lock_irq(&head->lock); - skb = skb_peek(head); - if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) - __skb_unlink(skb, head); - else - skb = NULL; - spin_unlock_irq(&head->lock); - - if (!skb) { - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); - if (!fpl) - return -ENOMEM; - - skb = alloc_skb(0, GFP_KERNEL); - if (!skb) { - kfree(fpl); - return -ENOMEM; - } - - fpl->user = get_uid(current_user()); - fpl->max = SCM_MAX_FD; - fpl->count = 0; - - UNIXCB(skb).fp = fpl; - skb->sk = sk; - skb->scm_io_uring = 1; - skb->destructor = unix_destruct_scm; - refcount_add(skb->truesize, &sk->sk_wmem_alloc); - } - - fpl = UNIXCB(skb).fp; - fpl->fp[fpl->count++] = get_file(file); - unix_inflight(fpl->user, file); - skb_queue_head(head, skb); - fput(file); -#endif - return 0; -} - static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { struct file *file = prsrc->file; -#if defined(CONFIG_UNIX) - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff_head list, *head = &sock->sk_receive_queue; - struct sk_buff *skb; - int i; - if (!io_file_need_scm(file)) { - fput(file); - return; - } - - __skb_queue_head_init(&list); - - /* - * Find the skb that holds this file in its SCM_RIGHTS. When found, - * remove this entry and rearrange the file array. - */ - skb = skb_dequeue(head); - while (skb) { - struct scm_fp_list *fp; - - fp = UNIXCB(skb).fp; - for (i = 0; i < fp->count; i++) { - int left; - - if (fp->fp[i] != file) - continue; - - unix_notinflight(fp->user, fp->fp[i]); - left = fp->count - 1 - i; - if (left) { - memmove(&fp->fp[i], &fp->fp[i + 1], - left * sizeof(struct file *)); - } - fp->count--; - if (!fp->count) { - kfree_skb(skb); - skb = NULL; - } else { - __skb_queue_tail(&list, skb); - } - fput(file); - file = NULL; - break; - } - - if (!file) - break; - - __skb_queue_tail(&list, skb); - - skb = skb_dequeue(head); - } - - if (skb_peek(&list)) { - spin_lock_irq(&head->lock); - while ((skb = __skb_dequeue(&list)) != NULL) - __skb_queue_tail(head, skb); - spin_unlock_irq(&head->lock); - } -#else fput(file); -#endif } int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, @@ -986,21 +848,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; /* - * Don't allow io_uring instances to be registered. If UNIX - * isn't enabled, then this causes a reference cycle and this - * instance can never get freed. If UNIX is enabled we'll - * handle it just fine, but there's still no point in allowing - * a ring fd as it doesn't support regular read/write anyway. + * Don't allow io_uring instances to be registered. */ if (io_is_uring_fops(file)) { fput(file); goto fail; } - ret = io_scm_file_account(ctx, file); - if (ret) { - fput(file); - goto fail; - } file_slot = io_fixed_file_slot(&ctx->file_table, i); io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index acaf8dad0540..85f145607c62 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,21 +77,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx); int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args, u64 __user *tags); -int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); - -static inline bool io_file_need_scm(struct file *filp) -{ - return false; -} - -static inline int io_scm_file_account(struct io_ring_ctx *ctx, - struct file *file) -{ - if (likely(!io_file_need_scm(file))) - return 0; - return __io_scm_file_account(ctx, file); -} - int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args); int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, diff --git a/kernel/async.c b/kernel/async.c index b2c4ba5686ee..673bba6bdf3a 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -145,6 +145,39 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } +static async_cookie_t __async_schedule_node_domain(async_func_t func, + void *data, int node, + struct async_domain *domain, + struct async_entry *entry) +{ + async_cookie_t newcookie; + unsigned long flags; + + INIT_LIST_HEAD(&entry->domain_list); + INIT_LIST_HEAD(&entry->global_list); + INIT_WORK(&entry->work, async_run_entry_fn); + entry->func = func; + entry->data = data; + entry->domain = domain; + + spin_lock_irqsave(&async_lock, flags); + + /* allocate cookie and queue */ + newcookie = entry->cookie = next_cookie++; + + list_add_tail(&entry->domain_list, &domain->pending); + if (domain->registered) + list_add_tail(&entry->global_list, &async_global_pending); + + atomic_inc(&entry_count); + spin_unlock_irqrestore(&async_lock, flags); + + /* schedule for execution */ + queue_work_node(node, system_unbound_wq, &entry->work); + + return newcookie; +} + /** * async_schedule_node_domain - NUMA specific version of async_schedule_domain * @func: function to execute asynchronously @@ -186,29 +219,8 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, func(data, newcookie); return newcookie; } - INIT_LIST_HEAD(&entry->domain_list); - INIT_LIST_HEAD(&entry->global_list); - INIT_WORK(&entry->work, async_run_entry_fn); - entry->func = func; - entry->data = data; - entry->domain = domain; - spin_lock_irqsave(&async_lock, flags); - - /* allocate cookie and queue */ - newcookie = entry->cookie = next_cookie++; - - list_add_tail(&entry->domain_list, &domain->pending); - if (domain->registered) - list_add_tail(&entry->global_list, &async_global_pending); - - atomic_inc(&entry_count); - spin_unlock_irqrestore(&async_lock, flags); - - /* schedule for execution */ - queue_work_node(node, system_unbound_wq, &entry->work); - - return newcookie; + return __async_schedule_node_domain(func, data, node, domain, entry); } EXPORT_SYMBOL_GPL(async_schedule_node_domain); @@ -231,6 +243,35 @@ async_cookie_t async_schedule_node(async_func_t func, void *data, int node) } EXPORT_SYMBOL_GPL(async_schedule_node); +/** + * async_schedule_dev_nocall - A simplified variant of async_schedule_dev() + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. + * + * If the asynchronous execution of @func is scheduled successfully, return + * true. Otherwise, do nothing and return false, unlike async_schedule_dev() + * that will run the function synchronously then. + */ +bool async_schedule_dev_nocall(async_func_t func, struct device *dev) +{ + struct async_entry *entry; + + entry = kzalloc(sizeof(struct async_entry), GFP_KERNEL); + + /* Give up if there is no memory or too much work. */ + if (!entry || atomic_read(&entry_count) > MAX_WORK) { + kfree(entry); + return false; + } + + __async_schedule_node_domain(func, dev, dev_to_node(dev), + &async_dfl_domain, entry); + return true; +} + /** * async_synchronize_full - synchronize all asynchronous function calls * diff --git a/kernel/audit.c b/kernel/audit.c index 9bc0b0301198..99127521cda8 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -488,15 +488,19 @@ static void auditd_conn_free(struct rcu_head *rcu) * @pid: auditd PID * @portid: auditd netlink portid * @net: auditd network namespace pointer + * @skb: the netlink command from the audit daemon + * @ack: netlink ack flag, cleared if ack'd here * * Description: * This function will obtain and drop network namespace references as * necessary. Returns zero on success, negative values on failure. */ -static int auditd_set(struct pid *pid, u32 portid, struct net *net) +static int auditd_set(struct pid *pid, u32 portid, struct net *net, + struct sk_buff *skb, bool *ack) { unsigned long flags; struct auditd_connection *ac_old, *ac_new; + struct nlmsghdr *nlh; if (!pid || !net) return -EINVAL; @@ -508,6 +512,13 @@ static int auditd_set(struct pid *pid, u32 portid, struct net *net) ac_new->portid = portid; ac_new->net = get_net(net); + /* send the ack now to avoid a race with the queue backlog */ + if (*ack) { + nlh = nlmsg_hdr(skb); + netlink_ack(skb, nlh, 0, NULL); + *ack = false; + } + spin_lock_irqsave(&auditd_conn_lock, flags); ac_old = rcu_dereference_protected(auditd_conn, lockdep_is_held(&auditd_conn_lock)); @@ -1201,7 +1212,8 @@ static int audit_replace(struct pid *pid) return auditd_send_unicast_skb(skb); } -static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + bool *ack) { u32 seq; void *data; @@ -1294,7 +1306,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* register a new auditd connection */ err = auditd_set(req_pid, NETLINK_CB(skb).portid, - sock_net(NETLINK_CB(skb).sk)); + sock_net(NETLINK_CB(skb).sk), + skb, ack); if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, @@ -1539,9 +1552,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) * Parse the provided skb and deal with any messages that may be present, * malformed skbs are discarded. */ -static void audit_receive(struct sk_buff *skb) +static void audit_receive(struct sk_buff *skb) { struct nlmsghdr *nlh; + bool ack; /* * len MUST be signed for nlmsg_next to be able to dec it below 0 * if the nlmsg_len was not aligned @@ -1554,9 +1568,12 @@ static void audit_receive(struct sk_buff *skb) audit_ctl_lock(); while (nlmsg_ok(nlh, len)) { - err = audit_receive_msg(skb, nlh); - /* if err or if this message says it wants a response */ - if (err || (nlh->nlmsg_flags & NLM_F_ACK)) + ack = nlh->nlmsg_flags & NLM_F_ACK; + err = audit_receive_msg(skb, nlh, &ack); + + /* send an ack if the user asked for one and audit_receive_msg + * didn't already do it, or if there was an error. */ + if (ack || err) netlink_ack(skb, nlh, err, NULL); nlh = nlmsg_next(nlh, &len); diff --git a/kernel/bounds.c b/kernel/bounds.c index b529182e8b04..c5a9fcd2d622 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -19,7 +19,7 @@ int main(void) DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); #ifdef CONFIG_SMP - DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); + DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 76bf1de26115..44abf88e1bb0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -857,7 +857,12 @@ static LIST_HEAD(pack_list); * CONFIG_MMU=n. Use PAGE_SIZE in these cases. */ #ifdef PMD_SIZE -#define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes()) +/* PMD_SIZE is really big for some archs. It doesn't make sense to + * reserve too much memory in one allocation. Hardcode BPF_PROG_PACK_SIZE to + * 2MiB * num_possible_nodes(). On most architectures PMD_SIZE will be + * greater than or equal to 2MB. + */ +#define BPF_PROG_PACK_SIZE (SZ_2M * num_possible_nodes()) #else #define BPF_PROG_PACK_SIZE PAGE_SIZE #endif diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 08a8e8102728..806a7c1b364b 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -222,7 +222,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) { - struct xdp_rxq_info rxq; + struct xdp_rxq_info rxq = {}; struct xdp_buff xdp; int i, nframes = 0; @@ -306,6 +306,7 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, static int cpu_map_kthread_run(void *data) { struct bpf_cpu_map_entry *rcpu = data; + unsigned long last_qs = jiffies; complete(&rcpu->kthread_running); set_current_state(TASK_INTERRUPTIBLE); @@ -331,10 +332,12 @@ static int cpu_map_kthread_run(void *data) if (__ptr_ring_empty(rcpu->queue)) { schedule(); sched = 1; + last_qs = jiffies; } else { __set_current_state(TASK_RUNNING); } } else { + rcu_softirq_qs_periodic(last_qs); sched = cond_resched(); } diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index f9a87dcc5535..e051cbb07dac 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -131,13 +131,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) bpf_map_init_from_attr(&dtab->map, attr); if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + /* hash table size must be power of 2; roundup_pow_of_two() can + * overflow into UB on 32-bit arches, so check that first + */ + if (dtab->map.max_entries > 1UL << 31) + return -EINVAL; + dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); - if (!dtab->n_buckets) /* Overflow check */ - return -EINVAL; - } - - if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, dtab->map.numa_node); if (!dtab->dev_index_head) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 88c71de0a0a9..0c74cc9012d5 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -495,7 +495,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) num_possible_cpus()); } - /* hash table size must be power of 2 */ + /* hash table size must be power of 2; roundup_pow_of_two() can overflow + * into UB on 32-bit arches, so check that first + */ + err = -E2BIG; + if (htab->map.max_entries > 1UL << 31) + goto free_htab; + htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); htab->elem_size = sizeof(struct htab_elem) + @@ -505,10 +511,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) else htab->elem_size += round_up(htab->map.value_size, 8); - err = -E2BIG; - /* prevent zero size kmalloc and check for u32 overflow */ - if (htab->n_buckets == 0 || - htab->n_buckets > U32_MAX / sizeof(struct bucket)) + /* check for u32 overflow */ + if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) goto free_htab; err = -ENOMEM; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6212e4ae084b..758510b46d87 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -28,12 +28,13 @@ * * Different map implementations will rely on rcu in map methods * lookup/update/delete, therefore eBPF programs must run under rcu lock - * if program is allowed to access maps, so check rcu_read_lock_held in - * all three functions. + * if program is allowed to access maps, so check rcu_read_lock_held() or + * rcu_read_lock_trace_held() in all three functions. */ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); return (unsigned long) map->ops->map_lookup_elem(map, key); } @@ -49,7 +50,8 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = { BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, void *, value, u64, flags) { - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); return map->ops->map_update_elem(map, key, value, flags); } @@ -66,7 +68,8 @@ const struct bpf_func_proto bpf_map_update_elem_proto = { BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); return map->ops->map_delete_elem(map, key); } @@ -325,7 +328,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) __this_cpu_write(irqsave_flags, flags); } -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) { __bpf_spin_lock_irqsave(lock); return 0; @@ -347,7 +350,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) local_irq_restore(flags); } -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) { __bpf_spin_unlock_irqrestore(lock); return 0; @@ -750,19 +753,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary * arguments representation. */ -#define MAX_BPRINTF_BUF_LEN 512 +#define MAX_BPRINTF_BIN_ARGS 512 /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 struct bpf_bprintf_buffers { - char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; + char bin_args[MAX_BPRINTF_BIN_ARGS]; + char buf[MAX_BPRINTF_BUF]; }; -static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); + +static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); -static int try_get_fmt_tmp_buf(char **tmp_buf) +static int try_get_buffers(struct bpf_bprintf_buffers **bufs) { - struct bpf_bprintf_buffers *bufs; int nest_level; preempt_disable(); @@ -772,18 +776,19 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_bprintf_bufs); - *tmp_buf = bufs->tmp_bufs[nest_level - 1]; + *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]); return 0; } -void bpf_bprintf_cleanup(void) +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { - if (this_cpu_read(bpf_bprintf_nest_level)) { - this_cpu_dec(bpf_bprintf_nest_level); - preempt_enable(); - } + if (!data->bin_args && !data->buf) + return; + if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) + return; + this_cpu_dec(bpf_bprintf_nest_level); + preempt_enable(); } /* @@ -792,18 +797,20 @@ void bpf_bprintf_cleanup(void) * Returns a negative value if fmt is an invalid format string or 0 otherwise. * * This can be used in two ways: - * - Format string verification only: when bin_args is NULL + * - Format string verification only: when data->get_bin_args is false * - Arguments preparation: in addition to the above verification, it writes in - * bin_args a binary representation of arguments usable by bstr_printf where - * pointers from BPF have been sanitized. + * data->bin_args a binary representation of arguments usable by bstr_printf + * where pointers from BPF have been sanitized. * * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use. */ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_args, u32 num_args) + u32 num_args, struct bpf_bprintf_data *data) { + bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; + struct bpf_bprintf_buffers *buffers = NULL; size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0; u64 cur_arg; @@ -814,14 +821,19 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return -EINVAL; fmt_size = fmt_end - fmt; - if (bin_args) { - if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) - return -EBUSY; + if (get_buffers && try_get_buffers(&buffers)) + return -EBUSY; - tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; - *bin_args = (u32 *)tmp_buf; + if (data->get_bin_args) { + if (num_args) + tmp_buf = buffers->bin_args; + tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS; + data->bin_args = (u32 *)tmp_buf; } + if (data->get_buf) + data->buf = buffers->buf; + for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { err = -EINVAL; @@ -1015,31 +1027,33 @@ nocopy_fmt: err = 0; out: if (err) - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(data); return err; } BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data); if (err < 0) return err; - err = bstr_printf(str, str_size, fmt, bin_args); + err = bstr_printf(str, str_size, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return err + 1; } @@ -1077,6 +1091,7 @@ struct bpf_hrtimer { struct bpf_prog *prog; void __rcu *callback_fn; void *value; + struct rcu_head rcu; }; /* the actual struct hidden inside uapi struct bpf_timer */ @@ -1298,6 +1313,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) if (in_nmi()) return -EOPNOTSUPP; + rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { @@ -1319,6 +1335,7 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + rcu_read_unlock(); return ret; } @@ -1373,7 +1390,7 @@ out: */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); - kfree(t); + kfree_rcu(t, rcu); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index f86db3cf7212..f0fd936cef31 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -94,11 +94,14 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) } else if (value_size / 8 > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); - /* hash table size must be power of 2 */ - n_buckets = roundup_pow_of_two(attr->max_entries); - if (!n_buckets) + /* hash table size must be power of 2; roundup_pow_of_two() can overflow + * into UB on 32-bit arches, so check that first + */ + if (attr->max_entries > 1UL << 31) return ERR_PTR(-E2BIG); + n_buckets = roundup_pow_of_two(attr->max_entries); + cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); if (!smap) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8d0c38a8dcf2..1e46a84694b8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1598,6 +1598,9 @@ int generic_map_delete_batch(struct bpf_map *map, if (!max_count) return 0; + if (put_user(0, &uattr->batch.count)) + return -EFAULT; + key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); if (!key) return -ENOMEM; @@ -1657,6 +1660,9 @@ int generic_map_update_batch(struct bpf_map *map, if (!max_count) return 0; + if (put_user(0, &uattr->batch.count)) + return -EFAULT; + key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); if (!key) return -ENOMEM; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 23b6d57b5eef..1a29ac4db6ea 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7448,6 +7448,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; struct bpf_map *fmt_map = fmt_reg->map_ptr; + struct bpf_bprintf_data data = {}; int err, fmt_map_off, num_args; u64 fmt_addr; char *fmt; @@ -7472,7 +7473,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we * can focus on validating the format specifiers. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); if (err < 0) verbose(env, "Invalid format string\n"); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index ad6333c3fe1f..db89ac94e7db 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -654,8 +654,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); - unsigned int iotlb_align_mask = - dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); + unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; unsigned int index, wrap, count = 0, i; unsigned int offset = swiotlb_align_offset(dev, orig_addr); @@ -666,6 +665,14 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, BUG_ON(!nslots); BUG_ON(area_index >= mem->nareas); + /* + * Ensure that the allocation is at least slot-aligned and update + * 'iotlb_align_mask' to ignore bits that will be preserved when + * offsetting into the allocation. + */ + alloc_align_mask |= (IO_TLB_SIZE - 1); + iotlb_align_mask &= ~alloc_align_mask; + /* * For mappings with an alignment requirement don't bother looping to * unaligned slots once we found an aligned one. For allocations of diff --git a/kernel/entry/common.c b/kernel/entry/common.c index be61332c66b5..ccf2b1e1b40b 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -77,8 +77,14 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, /* Either of the above might have changed the syscall number */ syscall = syscall_get_nr(current, regs); - if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { trace_sys_enter(regs, syscall); + /* + * Probes or BPF hooks in the tracepoint may have changed the + * system call number as well. + */ + syscall = syscall_get_nr(current, regs); + } syscall_enter_audit(regs, syscall); diff --git a/kernel/events/core.c b/kernel/events/core.c index 8c7d2f4f5fba..872d149b1959 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11223,9 +11223,30 @@ static DEVICE_ATTR_RW(perf_event_mux_interval_ms); static struct attribute *pmu_dev_attrs[] = { &dev_attr_type.attr, &dev_attr_perf_event_mux_interval_ms.attr, + &dev_attr_nr_addr_filters.attr, + NULL, +}; + +static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + + if (n == 2 && !pmu->nr_addr_filters) + return 0; + + return a->mode; +} + +static struct attribute_group pmu_dev_attr_group = { + .is_visible = pmu_dev_is_visible, + .attrs = pmu_dev_attrs, +}; + +static const struct attribute_group *pmu_dev_groups[] = { + &pmu_dev_attr_group, NULL, }; -ATTRIBUTE_GROUPS(pmu_dev); static int pmu_bus_running; static struct bus_type pmu_bus = { @@ -11261,18 +11282,11 @@ static int pmu_dev_alloc(struct pmu *pmu) if (ret) goto free_dev; - /* For PMUs with address filters, throw in an extra attribute: */ - if (pmu->nr_addr_filters) - ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters); - - if (ret) - goto del_dev; - - if (pmu->attr_update) + if (pmu->attr_update) { ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update); - - if (ret) - goto del_dev; + if (ret) + goto del_dev; + } out: return ret; diff --git a/kernel/module/main.c b/kernel/module/main.c index 7a376e26de85..554aba47ab68 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2434,6 +2434,11 @@ static void do_free_init(struct work_struct *w) } } +void flush_module_init_free_work(void) +{ + flush_work(&init_free_wq); +} + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "module." /* Default value for module->async_probe_requested */ @@ -2524,8 +2529,8 @@ static noinline int do_init_module(struct module *mod) * Note that module_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to - * be cleaned up needs to sync with the queued work - ie - * rcu_barrier() + * be cleaned up needs to sync with the queued work by invoking + * flush_module_init_free_work(). */ if (llist_add(&freeinit->node, &init_free_list)) schedule_work(&init_free_wq); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 48e0f98aebb6..e17ec408adf4 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -196,6 +196,7 @@ static int __init mem_sleep_default_setup(char *str) if (mem_sleep_labels[state] && !strcmp(str, mem_sleep_labels[state])) { mem_sleep_default = state; + mem_sleep_current = state; break; } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index cc44c37699de..5ab54ab5ace7 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -605,11 +605,11 @@ static int crc32_threadfn(void *data) unsigned i; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -618,7 +618,7 @@ static int crc32_threadfn(void *data) for (i = 0; i < d->run_threads; i++) *d->crc32 = crc32_le(*d->crc32, d->unc[i], *d->unc_len[i]); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -648,12 +648,12 @@ static int lzo_compress_threadfn(void *data) struct cmp_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -662,7 +662,7 @@ static int lzo_compress_threadfn(void *data) d->ret = lzo1x_1_compress(d->unc, d->unc_len, d->cmp + LZO_HEADER, &d->cmp_len, d->wrk); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -797,7 +797,7 @@ static int save_image_lzo(struct swap_map_handle *handle, data[thr].unc_len = off; - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -805,12 +805,12 @@ static int save_image_lzo(struct swap_map_handle *handle, break; crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -849,7 +849,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } } - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } @@ -1131,12 +1131,12 @@ static int lzo_decompress_threadfn(void *data) struct dec_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -1149,7 +1149,7 @@ static int lzo_decompress_threadfn(void *data) flush_icache_range((unsigned long)d->unc, (unsigned long)d->unc + d->unc_len); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -1334,7 +1334,7 @@ static int load_image_lzo(struct swap_map_handle *handle, } if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); crc->run_threads = 0; } @@ -1370,7 +1370,7 @@ static int load_image_lzo(struct swap_map_handle *handle, pg = 0; } - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -1389,7 +1389,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -1420,7 +1420,7 @@ static int load_image_lzo(struct swap_map_handle *handle, ret = snapshot_write_next(snapshot); if (ret <= 0) { crc->run_threads = thr + 1; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); goto out_finish; } @@ -1428,13 +1428,13 @@ static int load_image_lzo(struct swap_map_handle *handle, } crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); } out_finish: if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } stop = ktime_get(); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fb9aee89bd63..0355ba43585a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1825,10 +1825,23 @@ static bool console_waiter; */ static void console_lock_spinning_enable(void) { + /* + * Do not use spinning in panic(). The panic CPU wants to keep the lock. + * Non-panic CPUs abandon the flush anyway. + * + * Just keep the lockdep annotation. The panic-CPU should avoid + * taking console_owner_lock because it might cause a deadlock. + * This looks like the easiest way how to prevent false lockdep + * reports without handling races a lockless way. + */ + if (panic_in_progress()) + goto lockdep; + raw_spin_lock(&console_owner_lock); console_owner = current; raw_spin_unlock(&console_owner_lock); +lockdep: /* The waiter may spin on us after setting console_owner */ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); } @@ -1852,6 +1865,22 @@ static int console_lock_spinning_disable_and_check(void) { int waiter; + /* + * Ignore spinning waiters during panic() because they might get stopped + * or blocked at any time, + * + * It is safe because nobody is allowed to start spinning during panic + * in the first place. If there has been a waiter then non panic CPUs + * might stay spinning. They would get stopped anyway. The panic context + * will never start spinning and an interrupted spin on panic CPU will + * never continue. + */ + if (panic_in_progress()) { + /* Keep lockdep happy. */ + spin_release(&console_owner_dep_map, _THIS_IP_); + return 0; + } + raw_spin_lock(&console_owner_lock); waiter = READ_ONCE(console_waiter); console_owner = NULL; @@ -1946,6 +1975,12 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } @@ -3044,6 +3079,21 @@ static int __init keep_bootcon_setup(char *str) early_param("keep_bootcon", keep_bootcon_setup); +static int console_call_setup(struct console *newcon, char *options) +{ + int err; + + if (!newcon->setup) + return 0; + + /* Synchronize with possible boot console. */ + console_lock(); + err = newcon->setup(newcon, options); + console_unlock(); + + return err; +} + /* * This is called by register_console() to try to match * the newly registered console with any of the ones selected @@ -3079,8 +3129,8 @@ static int try_enable_preferred_console(struct console *newcon, if (_braille_register_console(newcon, c)) return 0; - if (newcon->setup && - (err = newcon->setup(newcon, c->options)) != 0) + err = console_call_setup(newcon, c->options); + if (err) return err; } newcon->flags |= CON_ENABLED; @@ -3106,7 +3156,7 @@ static void try_enable_default_console(struct console *newcon) if (newcon->index < 0) newcon->index = 0; - if (newcon->setup && newcon->setup(newcon, NULL) != 0) + if (console_call_setup(newcon, NULL) != 0) return; newcon->flags |= CON_ENABLED; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 15df37bc052a..61f9503a5fe9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1051,6 +1051,38 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) return needmore; } +static void swake_up_one_online_ipi(void *arg) +{ + struct swait_queue_head *wqh = arg; + + swake_up_one(wqh); +} + +static void swake_up_one_online(struct swait_queue_head *wqh) +{ + int cpu = get_cpu(); + + /* + * If called from rcutree_report_cpu_starting(), wake up + * is dangerous that late in the CPU-down hotplug process. The + * scheduler might queue an ignored hrtimer. Defer the wake up + * to an online CPU instead. + */ + if (unlikely(cpu_is_offline(cpu))) { + int target; + + target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU), + cpu_online_mask); + + smp_call_function_single(target, swake_up_one_online_ipi, + wqh, 0); + put_cpu(); + } else { + put_cpu(); + swake_up_one(wqh); + } +} + /* * Awaken the grace-period kthread. Don't do a self-awaken (unless in an * interrupt or softirq handler, in which case we just might immediately @@ -1075,7 +1107,7 @@ static void rcu_gp_kthread_wake(void) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); - swake_up_one(&rcu_state.gp_wq); + swake_up_one_online(&rcu_state.gp_wq); } /* @@ -4433,13 +4465,16 @@ static void __init rcu_start_exp_gp_kworkers(void) rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { pr_err("Failed to create %s!\n", gp_kworker_name); + rcu_exp_gp_kworker = NULL; return; } rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { pr_err("Failed to create %s!\n", par_gp_kworker_name); + rcu_exp_par_gp_kworker = NULL; kthread_destroy_worker(rcu_exp_gp_kworker); + rcu_exp_gp_kworker = NULL; return; } diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index aa3ec3c3b9f7..75e8d9652f7b 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -172,7 +172,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) return ret; } - /* * Report the exit from RCU read-side critical section for the last task * that queued itself during or before the current expedited preemptible-RCU @@ -200,7 +199,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) { smp_mb(); /* EGP done before wake_up(). */ - swake_up_one(&rcu_state.expedited_wq); + swake_up_one_online(&rcu_state.expedited_wq); } break; } @@ -428,7 +427,12 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) __sync_rcu_exp_select_node_cpus(rewp); } -static inline bool rcu_gp_par_worker_started(void) +static inline bool rcu_exp_worker_started(void) +{ + return !!READ_ONCE(rcu_exp_gp_kworker); +} + +static inline bool rcu_exp_par_worker_started(void) { return !!READ_ONCE(rcu_exp_par_gp_kworker); } @@ -478,7 +482,12 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) __sync_rcu_exp_select_node_cpus(rewp); } -static inline bool rcu_gp_par_worker_started(void) +static inline bool rcu_exp_worker_started(void) +{ + return !!READ_ONCE(rcu_gp_wq); +} + +static inline bool rcu_exp_par_worker_started(void) { return !!READ_ONCE(rcu_par_gp_wq); } @@ -541,7 +550,7 @@ static void sync_rcu_exp_select_cpus(void) rnp->exp_need_flush = false; if (!READ_ONCE(rnp->expmask)) continue; /* Avoid early boot non-existent wq. */ - if (!rcu_gp_par_worker_started() || + if (!rcu_exp_par_worker_started() || rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_is_last_leaf_node(rnp)) { /* No worker started yet or last leaf, do direct call. */ @@ -911,7 +920,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) */ void synchronize_rcu_expedited(void) { - bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT); + bool use_worker; unsigned long flags; struct rcu_exp_work rew; struct rcu_node *rnp; @@ -922,6 +931,9 @@ void synchronize_rcu_expedited(void) lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu_expedited() in RCU read-side critical section"); + use_worker = (rcu_scheduler_active != RCU_SCHEDULER_INIT) && + rcu_exp_worker_started(); + /* Is the state is such that the call is a grace period? */ if (rcu_blocking_is_gp()) { // Note well that this code runs with !PREEMPT && !SMP. @@ -951,7 +963,7 @@ void synchronize_rcu_expedited(void) return; /* Someone else did our work for us. */ /* Ensure that load happens before action based on it. */ - if (unlikely(boottime)) { + if (unlikely(!use_worker)) { /* Direct call during scheduler init and early_initcalls(). */ rcu_exp_sel_wait_wake(s); } else { @@ -969,7 +981,7 @@ void synchronize_rcu_expedited(void) /* Let the next expedited grace period start. */ mutex_unlock(&rcu_state.exp_mutex); - if (likely(!boottime)) + if (likely(use_worker)) synchronize_rcu_expedited_destroy_work(&rew); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9c14e4be5c0e..95430b0b9726 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6656,7 +6656,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu if (!available_idle_cpu(cpu)) { idle = false; if (*idle_cpu == -1) { - if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) { + if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) { *idle_cpu = cpu; break; } @@ -6664,7 +6664,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu } break; } - if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr)) + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, cpus)) *idle_cpu = cpu; } @@ -6678,13 +6678,19 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu /* * Scan the local SMT mask for idle CPUs. */ -static int select_idle_smt(struct task_struct *p, int target) +static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { int cpu; for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) { if (cpu == target) continue; + /* + * Check if the CPU is in the LLC scheduling domain of @target. + * Due to isolcpus, there is no guarantee that all the siblings are in the domain. + */ + if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) + continue; if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) return cpu; } @@ -6708,7 +6714,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma return __select_idle_cpu(core, p); } -static inline int select_idle_smt(struct task_struct *p, int target) +static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { return -1; } @@ -6970,7 +6976,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) has_idle_core = test_idle_cores(target); if (!has_idle_core && cpus_share_cache(prev, target)) { - i = select_idle_smt(p, prev); + i = select_idle_smt(p, sd, prev); if ((unsigned int)i < nr_cpumask_bits) return i; } diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 0c5be7ebb1dc..08b16d20c85b 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -161,6 +161,9 @@ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK) +static DEFINE_MUTEX(membarrier_ipi_mutex); +#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex) + static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ @@ -258,6 +261,7 @@ static int membarrier_global_expedited(void) if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { @@ -346,6 +350,7 @@ static int membarrier_private_expedited(int flags, int cpu_id) if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); if (cpu_id >= 0) { @@ -459,6 +464,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) * between threads which are users of @mm has its membarrier state * updated. */ + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 76bafa8d331a..3a2335bc1d58 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -37,6 +37,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rt_runtime_us", @@ -44,6 +46,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_NEG_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rr_timeslice_ms", @@ -2970,9 +2974,6 @@ static int sched_rt_global_constraints(void) #ifdef CONFIG_SYSCTL static int sched_rt_global_validate(void) { - if (sysctl_sched_rt_period <= 0) - return -EINVAL; - if ((sysctl_sched_rt_runtime != RUNTIME_INF) && ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || ((u64)sysctl_sched_rt_runtime * @@ -3003,7 +3004,7 @@ static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) { ret = sched_rt_global_validate(); @@ -3047,6 +3048,9 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, sched_rr_timeslice = sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE : msecs_to_jiffies(sysctl_sched_rr_timeslice); + + if (sysctl_sched_rr_timeslice <= 0) + sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE); } mutex_unlock(&mutex); diff --git a/kernel/sys.c b/kernel/sys.c index c85e1abf7b7c..d06eda1387b6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1778,74 +1778,87 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) struct task_struct *t; unsigned long flags; u64 tgutime, tgstime, utime, stime; - unsigned long maxrss = 0; + unsigned long maxrss; + struct mm_struct *mm; + struct signal_struct *sig = p->signal; + unsigned int seq = 0; - memset((char *)r, 0, sizeof (*r)); +retry: + memset(r, 0, sizeof(*r)); utime = stime = 0; + maxrss = 0; if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); - maxrss = p->signal->maxrss; - goto out; + maxrss = sig->maxrss; + goto out_thread; } - if (!lock_task_sighand(p, &flags)) - return; + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); switch (who) { case RUSAGE_BOTH: case RUSAGE_CHILDREN: - utime = p->signal->cutime; - stime = p->signal->cstime; - r->ru_nvcsw = p->signal->cnvcsw; - r->ru_nivcsw = p->signal->cnivcsw; - r->ru_minflt = p->signal->cmin_flt; - r->ru_majflt = p->signal->cmaj_flt; - r->ru_inblock = p->signal->cinblock; - r->ru_oublock = p->signal->coublock; - maxrss = p->signal->cmaxrss; + utime = sig->cutime; + stime = sig->cstime; + r->ru_nvcsw = sig->cnvcsw; + r->ru_nivcsw = sig->cnivcsw; + r->ru_minflt = sig->cmin_flt; + r->ru_majflt = sig->cmaj_flt; + r->ru_inblock = sig->cinblock; + r->ru_oublock = sig->coublock; + maxrss = sig->cmaxrss; if (who == RUSAGE_CHILDREN) break; fallthrough; case RUSAGE_SELF: - thread_group_cputime_adjusted(p, &tgutime, &tgstime); - utime += tgutime; - stime += tgstime; - r->ru_nvcsw += p->signal->nvcsw; - r->ru_nivcsw += p->signal->nivcsw; - r->ru_minflt += p->signal->min_flt; - r->ru_majflt += p->signal->maj_flt; - r->ru_inblock += p->signal->inblock; - r->ru_oublock += p->signal->oublock; - if (maxrss < p->signal->maxrss) - maxrss = p->signal->maxrss; - t = p; - do { + r->ru_nvcsw += sig->nvcsw; + r->ru_nivcsw += sig->nivcsw; + r->ru_minflt += sig->min_flt; + r->ru_majflt += sig->maj_flt; + r->ru_inblock += sig->inblock; + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; + + rcu_read_lock(); + __for_each_thread(sig, t) accumulate_thread_rusage(t, r); - } while_each_thread(p, t); + rcu_read_unlock(); + break; default: BUG(); } - unlock_task_sighand(p, &flags); -out: + if (need_seqretry(&sig->stats_lock, seq)) { + seq = 1; + goto retry; + } + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + + if (who == RUSAGE_CHILDREN) + goto out_children; + + thread_group_cputime_adjusted(p, &tgutime, &tgstime); + utime += tgutime; + stime += tgstime; + +out_thread: + mm = get_task_mm(p); + if (mm) { + setmax_mm_hiwater_rss(&maxrss, mm); + mmput(mm); + } + +out_children: + r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ r->ru_utime = ns_to_kernel_old_timeval(utime); r->ru_stime = ns_to_kernel_old_timeval(stime); - - if (who != RUSAGE_CHILDREN) { - struct mm_struct *mm = get_task_mm(p); - - if (mm) { - setmax_mm_hiwater_rss(&maxrss, mm); - mmput(mm); - } - } - r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ } SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c90e710d537..cc6db3bce1b2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -126,6 +126,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; static atomic_t watchdog_reset_pending; +static int64_t watchdog_max_interval; static inline void clocksource_watchdog_lock(unsigned long *flags) { @@ -144,6 +145,7 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating); * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) +#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ)) static void clocksource_watchdog_work(struct work_struct *work) { @@ -396,8 +398,8 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_watchdog(struct timer_list *unused) { u64 csnow, wdnow, cslast, wdlast, delta; + int64_t wd_nsec, cs_nsec, interval; int next_cpu, reset_pending; - int64_t wd_nsec, cs_nsec; struct clocksource *cs; enum wd_read_status read_ret; unsigned long extra_wait = 0; @@ -467,6 +469,27 @@ static void clocksource_watchdog(struct timer_list *unused) if (atomic_read(&watchdog_reset_pending)) continue; + /* + * The processing of timer softirqs can get delayed (usually + * on account of ksoftirqd not getting to run in a timely + * manner), which causes the watchdog interval to stretch. + * Skew detection may fail for longer watchdog intervals + * on account of fixed margins being used. + * Some clocksources, e.g. acpi_pm, cannot tolerate + * watchdog intervals longer than a few seconds. + */ + interval = max(cs_nsec, wd_nsec); + if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) { + if (system_state > SYSTEM_SCHEDULING && + interval > 2 * watchdog_max_interval) { + watchdog_max_interval = interval; + pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n", + cs_nsec, wd_nsec); + } + watchdog_timer.expires = jiffies; + continue; + } + /* Check the deviation from the watchdog clocksource. */ md = cs->uncertainty_margin + watchdog->uncertainty_margin; if (abs(cs_nsec - wd_nsec) > md) { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5561dabc9b22..9bb88836c42e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1082,6 +1082,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, enum hrtimer_mode mode) { debug_activate(timer, mode); + WARN_ON_ONCE(!base->cpu_base->online); base->cpu_base->active_bases |= 1 << base->index; @@ -2180,6 +2181,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->online = 1; hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } @@ -2247,6 +2249,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); raw_spin_unlock(&new_base->lock); + old_base->online = 0; raw_spin_unlock(&old_base->lock); return 0; @@ -2263,7 +2266,7 @@ void __init hrtimers_init(void) /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * @clock_id: timer clock to be used */ @@ -2290,6 +2293,13 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } + /* + * Override any slack passed by the user if under + * rt contraints. + */ + if (rt_task(current)) + delta = 0; + hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); @@ -2309,7 +2319,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * * Make the current task sleep until the given expiry time has @@ -2317,7 +2327,8 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); * the current task state has been set (see set_current_state()). * * The @delta argument gives the kernel the freedom to schedule the - * actual wakeup to a time that is both power and performance friendly. + * actual wakeup to a time that is both power and performance friendly + * for regular (non RT/DL) tasks. * The kernel give the normal best effort behavior for "@expires+@delta", * but may decide to fire the timer earlier, but no earlier than @expires. * diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1a5d02224d46..8cfdc6b978d7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1557,6 +1557,7 @@ void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t idle_sleeptime, iowait_sleeptime; + unsigned long idle_calls, idle_sleeps; # ifdef CONFIG_HIGH_RES_TIMERS if (ts->sched_timer.base) @@ -1565,9 +1566,13 @@ void tick_cancel_sched_timer(int cpu) idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; + idle_calls = ts->idle_calls; + idle_sleeps = ts->idle_sleeps; memset(ts, 0, sizeof(*ts)); ts->idle_sleeptime = idle_sleeptime; ts->iowait_sleeptime = iowait_sleeptime; + ts->idle_calls = idle_calls; + ts->idle_sleeps = idle_sleeps; } #endif diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c index 831e8e779ace..f7c3de01197c 100644 --- a/kernel/time/time_test.c +++ b/kernel/time/time_test.c @@ -73,7 +73,7 @@ static void time64_to_tm_test_date_range(struct kunit *test) days = div_s64(secs, 86400); - #define FAIL_MSG "%05ld/%02d/%02d (%2d) : %ld", \ + #define FAIL_MSG "%05ld/%02d/%02d (%2d) : %lld", \ year, month, mdday, yday, days KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 221c8c404973..b158cbef4d8d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1180,13 +1180,15 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history, } /* - * cycle_between - true if test occurs chronologically between before and after + * timestamp_in_interval - true if ts is chronologically in [start, end] + * + * True if ts occurs chronologically at or after start, and before or at end. */ -static bool cycle_between(u64 before, u64 test, u64 after) +static bool timestamp_in_interval(u64 start, u64 end, u64 ts) { - if (test > before && test < after) + if (ts >= start && ts <= end) return true; - if (test < before && before > after) + if (start > end && (ts >= start || ts <= end)) return true; return false; } @@ -1246,7 +1248,7 @@ int get_device_system_crosststamp(int (*get_time_fn) */ now = tk_clock_read(&tk->tkr_mono); interval_start = tk->tkr_mono.cycle_last; - if (!cycle_between(interval_start, cycles, now)) { + if (!timestamp_in_interval(interval_start, now, cycles)) { clock_was_set_seq = tk->clock_was_set_seq; cs_was_changed_seq = tk->cs_was_changed_seq; cycles = interval_start; @@ -1259,10 +1261,8 @@ int get_device_system_crosststamp(int (*get_time_fn) tk_core.timekeeper.offs_real); base_raw = tk->tkr_raw.base; - nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, - system_counterval.cycles); - nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, - system_counterval.cycles); + nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); + nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); } while (read_seqcount_retry(&tk_core.seq, seq)); xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); @@ -1277,13 +1277,13 @@ int get_device_system_crosststamp(int (*get_time_fn) bool discontinuity; /* - * Check that the counter value occurs after the provided + * Check that the counter value is not before the provided * history reference and that the history doesn't cross a * clocksource change */ if (!history_begin || - !cycle_between(history_begin->cycles, - system_counterval.cycles, cycles) || + !timestamp_in_interval(history_begin->cycles, + cycles, system_counterval.cycles) || history_begin->cs_was_changed_seq != cs_was_changed_seq) return -EINVAL; partial_history_cycles = cycles - system_counterval.cycles; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 717fcb9fb14a..59469897432b 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1083,7 +1083,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option /* * We are trying to schedule the timer on the new base. * However we can't change timer's base while it is running, - * otherwise del_timer_sync() can't detect that the timer's + * otherwise timer_delete_sync() can't detect that the timer's * handler yet has not finished. This also guarantees that the * timer is serialized wrt itself. */ @@ -1121,14 +1121,16 @@ out_unlock: } /** - * mod_timer_pending - modify a pending timer's timeout - * @timer: the pending timer to be modified - * @expires: new timeout in jiffies + * mod_timer_pending - Modify a pending timer's timeout + * @timer: The pending timer to be modified + * @expires: New absolute timeout in jiffies * - * mod_timer_pending() is the same for pending timers as mod_timer(), - * but will not re-activate and modify already deleted timers. + * mod_timer_pending() is the same for pending timers as mod_timer(), but + * will not activate inactive timers. * - * It is useful for unserialized use of timers. + * Return: + * * %0 - The timer was inactive and not modified + * * %1 - The timer was active and requeued to expire at @expires */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { @@ -1137,24 +1139,27 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(mod_timer_pending); /** - * mod_timer - modify a timer's timeout - * @timer: the timer to be modified - * @expires: new timeout in jiffies - * - * mod_timer() is a more efficient way to update the expire field of an - * active timer (if the timer is inactive it will be activated) + * mod_timer - Modify a timer's timeout + * @timer: The timer to be modified + * @expires: New absolute timeout in jiffies * * mod_timer(timer, expires) is equivalent to: * * del_timer(timer); timer->expires = expires; add_timer(timer); * + * mod_timer() is more efficient than the above open coded sequence. In + * case that the timer is inactive, the del_timer() part is a NOP. The + * timer is in any case activated with the new expiry time @expires. + * * Note that if there are multiple unserialized concurrent users of the * same timer, then mod_timer() is the only safe way to modify the timeout, * since add_timer() cannot modify an already running timer. * - * The function returns whether it has modified a pending timer or not. - * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an - * active timer returns 1.) + * Return: + * * %0 - The timer was inactive and started + * * %1 - The timer was active and requeued to expire at @expires or + * the timer was active and not modified because @expires did + * not change the effective expiry time */ int mod_timer(struct timer_list *timer, unsigned long expires) { @@ -1165,11 +1170,18 @@ EXPORT_SYMBOL(mod_timer); /** * timer_reduce - Modify a timer's timeout if it would reduce the timeout * @timer: The timer to be modified - * @expires: New timeout in jiffies + * @expires: New absolute timeout in jiffies * * timer_reduce() is very similar to mod_timer(), except that it will only - * modify a running timer if that would reduce the expiration time (it will - * start a timer that isn't running). + * modify an enqueued timer if that would reduce the expiration time. If + * @timer is not enqueued it starts the timer. + * + * Return: + * * %0 - The timer was inactive and started + * * %1 - The timer was active and requeued to expire at @expires or + * the timer was active and not modified because @expires + * did not change the effective expiry time such that the + * timer would expire earlier than already scheduled */ int timer_reduce(struct timer_list *timer, unsigned long expires) { @@ -1178,18 +1190,21 @@ int timer_reduce(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(timer_reduce); /** - * add_timer - start a timer - * @timer: the timer to be added + * add_timer - Start a timer + * @timer: The timer to be started * - * The kernel will do a ->function(@timer) callback from the - * timer interrupt at the ->expires point in the future. The - * current time is 'jiffies'. + * Start @timer to expire at @timer->expires in the future. @timer->expires + * is the absolute expiry time measured in 'jiffies'. When the timer expires + * timer->function(timer) will be invoked from soft interrupt context. * - * The timer's ->expires, ->function fields must be set prior calling this - * function. + * The @timer->expires and @timer->function fields must be set prior + * to calling this function. * - * Timers with an ->expires field in the past will be executed in the next - * timer tick. + * If @timer->expires is already in the past @timer will be queued to + * expire at the next timer tick. + * + * This can only operate on an inactive timer. Attempts to invoke this on + * an active timer are rejected with a warning. */ void add_timer(struct timer_list *timer) { @@ -1199,11 +1214,13 @@ void add_timer(struct timer_list *timer) EXPORT_SYMBOL(add_timer); /** - * add_timer_on - start a timer on a particular CPU - * @timer: the timer to be added - * @cpu: the CPU to start it on + * add_timer_on - Start a timer on a particular CPU + * @timer: The timer to be started + * @cpu: The CPU to start it on * - * This is not very scalable on SMP. Double adds are not possible. + * Same as add_timer() except that it starts the timer on the given CPU. + * + * See add_timer() for further details. */ void add_timer_on(struct timer_list *timer, int cpu) { @@ -1238,15 +1255,18 @@ void add_timer_on(struct timer_list *timer, int cpu) EXPORT_SYMBOL_GPL(add_timer_on); /** - * del_timer - deactivate a timer. - * @timer: the timer to be deactivated + * del_timer - Deactivate a timer. + * @timer: The timer to be deactivated * - * del_timer() deactivates a timer - this works on both active and inactive - * timers. + * The function only deactivates a pending timer, but contrary to + * timer_delete_sync() it does not take into account whether the timer's + * callback function is concurrently executed on a different CPU or not. + * It neither prevents rearming of the timer. If @timer can be rearmed + * concurrently then the return value of this function is meaningless. * - * The function returns whether it has deactivated a pending timer or not. - * (ie. del_timer() of an inactive timer returns 0, del_timer() of an - * active timer returns 1.) + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ int del_timer(struct timer_list *timer) { @@ -1268,10 +1288,19 @@ EXPORT_SYMBOL(del_timer); /** * try_to_del_timer_sync - Try to deactivate a timer - * @timer: timer to delete + * @timer: Timer to deactivate * - * This function tries to deactivate a timer. Upon successful (ret >= 0) - * exit the timer is not queued and the handler is not running on any CPU. + * This function tries to deactivate a timer. On success the timer is not + * queued and the timer callback function is not running on any CPU. + * + * This function does not guarantee that the timer cannot be rearmed right + * after dropping the base lock. That needs to be prevented by the calling + * code if necessary. + * + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated + * * %-1 - The timer callback function is running on a different CPU */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -1365,25 +1394,20 @@ static inline void timer_sync_wait_running(struct timer_base *base) { } static inline void del_timer_wait_running(struct timer_list *timer) { } #endif -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) /** - * del_timer_sync - deactivate a timer and wait for the handler to finish. - * @timer: the timer to be deactivated - * - * This function only differs from del_timer() on SMP: besides deactivating - * the timer it also makes sure the handler has finished executing on other - * CPUs. + * timer_delete_sync - Deactivate a timer and wait for the handler to finish. + * @timer: The timer to be deactivated * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from * interrupt contexts unless the timer is an irqsafe one. The caller must - * not hold locks which would prevent completion of the timer's - * handler. The timer's handler must not call add_timer_on(). Upon exit the - * timer is not queued and the handler is not running on any CPU. + * not hold locks which would prevent completion of the timer's callback + * function. The timer's handler must not call add_timer_on(). Upon exit + * the timer is not queued and the handler is not running on any CPU. * - * Note: For !irqsafe timers, you must not hold locks that are held in - * interrupt context while calling this function. Even if the lock has - * nothing to do with the timer in question. Here's why:: + * For !irqsafe timers, the caller must not hold locks that are held in + * interrupt context. Even if the lock has nothing to do with the timer in + * question. Here's why:: * * CPU0 CPU1 * ---- ---- @@ -1393,16 +1417,23 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * spin_lock_irq(somelock); * * spin_lock(somelock); - * del_timer_sync(mytimer); + * timer_delete_sync(mytimer); * while (base->running_timer == mytimer); * - * Now del_timer_sync() will never return and never release somelock. - * The interrupt on the other CPU is waiting to grab somelock but - * it has interrupted the softirq that CPU0 is waiting to finish. + * Now timer_delete_sync() will never return and never release somelock. + * The interrupt on the other CPU is waiting to grab somelock but it has + * interrupted the softirq that CPU0 is waiting to finish. * - * The function returns whether it has deactivated a pending timer or not. + * This function cannot guarantee that the timer is not rearmed again by + * some concurrent or preempting code, right after it dropped the base + * lock. If there is the possibility of a concurrent rearm then the return + * value of the function is meaningless. + * + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ -int del_timer_sync(struct timer_list *timer) +int timer_delete_sync(struct timer_list *timer) { int ret; @@ -1442,8 +1473,7 @@ int del_timer_sync(struct timer_list *timer) return ret; } -EXPORT_SYMBOL(del_timer_sync); -#endif +EXPORT_SYMBOL(timer_delete_sync); static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *), @@ -1465,8 +1495,8 @@ static void call_timer_fn(struct timer_list *timer, #endif /* * Couple the lock chain with the lock chain at - * del_timer_sync() by acquiring the lock_map around the fn() - * call here and in del_timer_sync(). + * timer_delete_sync() by acquiring the lock_map around the fn() + * call here and in timer_delete_sync(). */ lock_map_acquire(&lockdep_map); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f4a494a457c5..3fdde232eaa9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -368,8 +368,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) return &bpf_probe_write_user_proto; } -static DEFINE_RAW_SPINLOCK(trace_printk_lock); - #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 @@ -377,23 +375,22 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; - u32 *bin_args; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; int ret; - ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, - MAX_TRACE_PRINTK_VARARGS); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, + MAX_TRACE_PRINTK_VARARGS, &data); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -426,30 +423,29 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, u32, data_len) { - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; int ret, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -471,23 +467,25 @@ const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) } BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (err < 0) return err; - seq_bprintf(m, fmt, bin_args); + seq_bprintf(m, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return seq_has_overflowed(m) ? -EOVERFLOW : 0; } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1285e7fb597e..431a922e5c89 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -414,7 +414,6 @@ struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; wait_queue_head_t full_waiters; - long wait_index; bool waiters_pending; bool full_waiters_pending; bool wakeup_full; @@ -908,8 +907,19 @@ static void rb_wake_up_waiters(struct irq_work *work) wake_up_all(&rbwork->waiters); if (rbwork->full_waiters_pending || rbwork->wakeup_full) { + /* Only cpu_buffer sets the above flags */ + struct ring_buffer_per_cpu *cpu_buffer = + container_of(rbwork, struct ring_buffer_per_cpu, irq_work); + + /* Called from interrupt context */ + raw_spin_lock(&cpu_buffer->reader_lock); rbwork->wakeup_full = false; rbwork->full_waiters_pending = false; + + /* Waking up all waiters, they will reset the shortest full */ + cpu_buffer->shortest_full = 0; + raw_spin_unlock(&cpu_buffer->reader_lock); + wake_up_all(&rbwork->full_waiters); } } @@ -949,14 +959,95 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) rbwork = &cpu_buffer->irq_work; } - rbwork->wait_index++; - /* make sure the waiters see the new index */ - smp_wmb(); - /* This can be called in any context */ irq_work_queue(&rbwork->work); } +static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer; + bool ret = false; + + /* Reads of all CPUs always waits for any data */ + if (cpu == RING_BUFFER_ALL_CPUS) + return !ring_buffer_empty(buffer); + + cpu_buffer = buffer->buffers[cpu]; + + if (!ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; + + if (!full) + return true; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + ret = !pagebusy && full_hit(buffer, cpu, full); + + if (!ret && (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full)) { + cpu_buffer->shortest_full = full; + } + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + } + return ret; +} + +static inline bool +rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, + int cpu, int full, ring_buffer_cond_fn cond, void *data) +{ + if (rb_watermark_hit(buffer, cpu, full)) + return true; + + if (cond(data)) + return true; + + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + if (full) + rbwork->full_waiters_pending = true; + else + rbwork->waiters_pending = true; + + return false; +} + +/* + * The default wait condition for ring_buffer_wait() is to just to exit the + * wait loop the first time it is woken up. + */ +static bool rb_wait_once(void *data) +{ + long *once = data; + + /* wait_event() actually calls this twice before scheduling*/ + if (*once > 1) + return true; + + (*once)++; + return false; +} + /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on @@ -970,101 +1061,39 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer; - DEFINE_WAIT(wait); - struct rb_irq_work *work; - long wait_index; + struct wait_queue_head *waitq; + ring_buffer_cond_fn cond; + struct rb_irq_work *rbwork; + void *data; + long once = 0; int ret = 0; + cond = rb_wait_once; + data = &once; + /* * Depending on what the caller is waiting for, either any * data in any cpu buffer, or a specific buffer, put the * caller on the appropriate wait queue. */ if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; + rbwork = &buffer->irq_work; /* Full only makes sense on per cpu reads */ full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -ENODEV; cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; - } - - wait_index = READ_ONCE(work->wait_index); - - while (true) { - if (full) - prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); - else - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - - /* - * The events can happen in critical sections where - * checking a work queue can cause deadlocks. - * After adding a task to the queue, this flag is set - * only to notify events to try to wake up the queue - * using irq_work. - * - * We don't clear it even if the buffer is no longer - * empty. The flag only causes the next event to run - * irq_work to do the work queue wake up. The worse - * that can happen if we race with !trace_empty() is that - * an event will cause an irq_work to try to wake up - * an empty queue. - * - * There's no reason to protect this flag either, as - * the work queue and irq_work logic will do the necessary - * synchronization for the wake ups. The only thing - * that is necessary is that the wake up happens after - * a task has been queued. It's OK for spurious wake ups. - */ - if (full) - work->full_waiters_pending = true; - else - work->waiters_pending = true; - - if (signal_pending(current)) { - ret = -EINTR; - break; - } - - if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) - break; - - if (cpu != RING_BUFFER_ALL_CPUS && - !ring_buffer_empty_cpu(buffer, cpu)) { - unsigned long flags; - bool pagebusy; - bool done; - - if (!full) - break; - - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - done = !pagebusy && full_hit(buffer, cpu, full); - - if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full > full) - cpu_buffer->shortest_full = full; - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (done) - break; - } - - schedule(); - - /* Make sure to see the new wait index */ - smp_rmb(); - if (wait_index != work->wait_index) - break; + rbwork = &cpu_buffer->irq_work; } if (full) - finish_wait(&work->full_waiters, &wait); + waitq = &rbwork->full_waiters; else - finish_wait(&work->waiters, &wait); + waitq = &rbwork->waiters; + + ret = wait_event_interruptible((*waitq), + rb_wait_cond(rbwork, buffer, cpu, full, cond, data)); return ret; } @@ -1088,30 +1117,51 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; - struct rb_irq_work *work; + struct rb_irq_work *rbwork; if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; + rbwork = &buffer->irq_work; full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -EINVAL; + return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; + rbwork = &cpu_buffer->irq_work; } if (full) { - poll_wait(filp, &work->full_waiters, poll_table); - work->full_waiters_pending = true; + unsigned long flags; + + poll_wait(filp, &rbwork->full_waiters, poll_table); + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; - } else { - poll_wait(filp, &work->waiters, poll_table); - work->waiters_pending = true; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (full_hit(buffer, cpu, full)) + return EPOLLIN | EPOLLRDNORM; + /* + * Only allow full_waiters_pending update to be seen after + * the shortest_full is set. If the writer sees the + * full_waiters_pending flag set, it will compare the + * amount in the ring buffer to shortest_full. If the amount + * in the ring buffer is greater than the shortest_full + * percent, it will call the irq_work handler to wake up + * this list. The irq_handler will reset shortest_full + * back to zero. That's done under the reader_lock, but + * the below smp_mb() makes sure that the update to + * full_waiters_pending doesn't leak up into the above. + */ + smp_mb(); + rbwork->full_waiters_pending = true; + return 0; } + poll_wait(filp, &rbwork->waiters, poll_table); + rbwork->waiters_pending = true; + /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit @@ -1127,9 +1177,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, */ smp_mb(); - if (full) - return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2b3c4cd8382b..f2b00ea38111 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -2249,7 +2250,7 @@ struct saved_cmdlines_buffer { unsigned *map_cmdline_to_pid; unsigned cmdline_num; int cmdline_idx; - char *saved_cmdlines; + char saved_cmdlines[]; }; static struct saved_cmdlines_buffer *savedcmd; @@ -2263,47 +2264,60 @@ static inline void set_cmdline(int idx, const char *cmdline) strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); } -static int allocate_cmdlines_buffer(unsigned int val, - struct saved_cmdlines_buffer *s) +static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) { + int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); + + kfree(s->map_cmdline_to_pid); + kmemleak_free(s); + free_pages((unsigned long)s, order); +} + +static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) +{ + struct saved_cmdlines_buffer *s; + struct page *page; + int orig_size, size; + int order; + + /* Figure out how much is needed to hold the given number of cmdlines */ + orig_size = sizeof(*s) + val * TASK_COMM_LEN; + order = get_order(orig_size); + size = 1 << (order + PAGE_SHIFT); + page = alloc_pages(GFP_KERNEL, order); + if (!page) + return NULL; + + s = page_address(page); + kmemleak_alloc(s, size, 1, GFP_KERNEL); + memset(s, 0, sizeof(*s)); + + /* Round up to actual allocation */ + val = (size - sizeof(*s)) / TASK_COMM_LEN; + s->cmdline_num = val; + s->map_cmdline_to_pid = kmalloc_array(val, sizeof(*s->map_cmdline_to_pid), GFP_KERNEL); - if (!s->map_cmdline_to_pid) - return -ENOMEM; - - s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); - if (!s->saved_cmdlines) { - kfree(s->map_cmdline_to_pid); - return -ENOMEM; + if (!s->map_cmdline_to_pid) { + free_saved_cmdlines_buffer(s); + return NULL; } s->cmdline_idx = 0; - s->cmdline_num = val; memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(s->map_pid_to_cmdline)); memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, val * sizeof(*s->map_cmdline_to_pid)); - return 0; + return s; } static int trace_create_savedcmd(void) { - int ret; + savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT); - savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); - if (!savedcmd) - return -ENOMEM; - - ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); - if (ret < 0) { - kfree(savedcmd); - savedcmd = NULL; - return -ENOMEM; - } - - return 0; + return savedcmd ? 0 : -ENOMEM; } int is_tracing_stopped(void) @@ -5972,26 +5986,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) -{ - kfree(s->saved_cmdlines); - kfree(s->map_cmdline_to_pid); - kfree(s); -} - static int tracing_resize_saved_cmdlines(unsigned int val) { struct saved_cmdlines_buffer *s, *savedcmd_temp; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = allocate_cmdlines_buffer(val); if (!s) return -ENOMEM; - if (allocate_cmdlines_buffer(val, s) < 0) { - kfree(s); - return -ENOMEM; - } - preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; @@ -8276,6 +8278,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return size; } +static int tracing_buffers_flush(struct file *file, fl_owner_t id) +{ + struct ftrace_buffer_info *info = file->private_data; + struct trace_iterator *iter = &info->iter; + + iter->wait_index++; + /* Make sure the waiters see the new wait_index */ + smp_wmb(); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + + return 0; +} + static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; @@ -8287,12 +8303,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); - iter->wait_index++; - /* Make sure the waiters see the new wait_index */ - smp_wmb(); - - ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); - if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); @@ -8506,6 +8516,7 @@ static const struct file_operations tracing_buffers_fops = { .read = tracing_buffers_read, .poll = tracing_buffers_poll, .release = tracing_buffers_release, + .flush = tracing_buffers_flush, .splice_read = tracing_buffers_splice_read, .unlocked_ioctl = tracing_buffers_ioctl, .llseek = no_llseek, diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 918730d74932..f941ce01ee35 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1455,8 +1455,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index c774e560f2f9..a4dcf0f24352 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) } memcpy(elt->key, key, map->key_size); - entry->val = elt; + /* + * Ensure the initialization is visible and + * publish the elt. + */ + smp_wmb(); + WRITE_ONCE(entry->val, elt); atomic64_inc(&map->hits); return entry->val; diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c index d4572dbc9145..705b82736be0 100644 --- a/lib/cmdline_kunit.c +++ b/lib/cmdline_kunit.c @@ -124,7 +124,7 @@ static void cmdline_do_one_range_test(struct kunit *test, const char *in, n, e[0], r[0]); p = memchr_inv(&r[1], 0, sizeof(r) - sizeof(r[0])); - KUNIT_EXPECT_PTR_EQ_MSG(test, p, NULL, "in test %u at %u out of bound", n, p - r); + KUNIT_EXPECT_PTR_EQ_MSG(test, p, NULL, "in test %u at %td out of bound", n, p - r); } static void cmdline_test_range(struct kunit *test) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index dacb80c22c4f..5dfa582dbadd 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -609,9 +609,8 @@ static void debug_objects_fill_pool(void) static void __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack) { - enum debug_obj_state state; + struct debug_obj *obj, o; struct debug_bucket *db; - struct debug_obj *obj; unsigned long flags; debug_objects_fill_pool(); @@ -632,24 +631,18 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: obj->state = ODEBUG_STATE_INIT; - break; - - case ODEBUG_STATE_ACTIVE: - state = obj->state; raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "init"); - debug_object_fixup(descr->fixup_init, addr, state); - return; - - case ODEBUG_STATE_DESTROYED: - raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "init"); return; default: break; } + o = *obj; raw_spin_unlock_irqrestore(&db->lock, flags); + debug_print_object(&o, "init"); + + if (o.state == ODEBUG_STATE_ACTIVE) + debug_object_fixup(descr->fixup_init, addr, o.state); } /** @@ -690,11 +683,9 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_stack); int debug_object_activate(void *addr, const struct debug_obj_descr *descr) { struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; - enum debug_obj_state state; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; - int ret; if (!debug_objects_enabled) return 0; @@ -706,49 +697,38 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr) raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object_or_alloc(addr, db, descr, false, true); - if (likely(!IS_ERR_OR_NULL(obj))) { - bool print_object = false; - + if (unlikely(!obj)) { + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_objects_oom(); + return 0; + } else if (likely(!IS_ERR(obj))) { switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + case ODEBUG_STATE_DESTROYED: + o = *obj; + break; case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: obj->state = ODEBUG_STATE_ACTIVE; - ret = 0; - break; - - case ODEBUG_STATE_ACTIVE: - state = obj->state; - raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "activate"); - ret = debug_object_fixup(descr->fixup_activate, addr, state); - return ret ? 0 : -EINVAL; - - case ODEBUG_STATE_DESTROYED: - print_object = true; - ret = -EINVAL; - break; + fallthrough; default: - ret = 0; - break; + raw_spin_unlock_irqrestore(&db->lock, flags); + return 0; } - raw_spin_unlock_irqrestore(&db->lock, flags); - if (print_object) - debug_print_object(obj, "activate"); - return ret; } raw_spin_unlock_irqrestore(&db->lock, flags); - - /* If NULL the allocation has hit OOM */ - if (!obj) { - debug_objects_oom(); - return 0; - } - - /* Object is neither static nor tracked. It's not initialized */ debug_print_object(&o, "activate"); - ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE); - return ret ? 0 : -EINVAL; + + switch (o.state) { + case ODEBUG_STATE_ACTIVE: + case ODEBUG_STATE_NOTAVAILABLE: + if (debug_object_fixup(descr->fixup_activate, addr, o.state)) + return 0; + fallthrough; + default: + return -EINVAL; + } } EXPORT_SYMBOL_GPL(debug_object_activate); @@ -759,10 +739,10 @@ EXPORT_SYMBOL_GPL(debug_object_activate); */ void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr) { + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; - bool print_object = false; if (!debug_objects_enabled) return; @@ -774,33 +754,24 @@ void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr) obj = lookup_object(addr, db); if (obj) { switch (obj->state) { + case ODEBUG_STATE_DESTROYED: + break; case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: case ODEBUG_STATE_ACTIVE: - if (!obj->astate) - obj->state = ODEBUG_STATE_INACTIVE; - else - print_object = true; - break; - - case ODEBUG_STATE_DESTROYED: - print_object = true; - break; + if (obj->astate) + break; + obj->state = ODEBUG_STATE_INACTIVE; + fallthrough; default: - break; + raw_spin_unlock_irqrestore(&db->lock, flags); + return; } + o = *obj; } raw_spin_unlock_irqrestore(&db->lock, flags); - if (!obj) { - struct debug_obj o = { .object = addr, - .state = ODEBUG_STATE_NOTAVAILABLE, - .descr = descr }; - - debug_print_object(&o, "deactivate"); - } else if (print_object) { - debug_print_object(obj, "deactivate"); - } + debug_print_object(&o, "deactivate"); } EXPORT_SYMBOL_GPL(debug_object_deactivate); @@ -811,11 +782,9 @@ EXPORT_SYMBOL_GPL(debug_object_deactivate); */ void debug_object_destroy(void *addr, const struct debug_obj_descr *descr) { - enum debug_obj_state state; + struct debug_obj *obj, o; struct debug_bucket *db; - struct debug_obj *obj; unsigned long flags; - bool print_object = false; if (!debug_objects_enabled) return; @@ -825,32 +794,31 @@ void debug_object_destroy(void *addr, const struct debug_obj_descr *descr) raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); - if (!obj) - goto out_unlock; + if (!obj) { + raw_spin_unlock_irqrestore(&db->lock, flags); + return; + } switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + case ODEBUG_STATE_DESTROYED: + break; case ODEBUG_STATE_NONE: case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: obj->state = ODEBUG_STATE_DESTROYED; - break; - case ODEBUG_STATE_ACTIVE: - state = obj->state; - raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "destroy"); - debug_object_fixup(descr->fixup_destroy, addr, state); - return; - - case ODEBUG_STATE_DESTROYED: - print_object = true; - break; + fallthrough; default: - break; + raw_spin_unlock_irqrestore(&db->lock, flags); + return; } -out_unlock: + + o = *obj; raw_spin_unlock_irqrestore(&db->lock, flags); - if (print_object) - debug_print_object(obj, "destroy"); + debug_print_object(&o, "destroy"); + + if (o.state == ODEBUG_STATE_ACTIVE) + debug_object_fixup(descr->fixup_destroy, addr, o.state); } EXPORT_SYMBOL_GPL(debug_object_destroy); @@ -861,9 +829,8 @@ EXPORT_SYMBOL_GPL(debug_object_destroy); */ void debug_object_free(void *addr, const struct debug_obj_descr *descr) { - enum debug_obj_state state; + struct debug_obj *obj, o; struct debug_bucket *db; - struct debug_obj *obj; unsigned long flags; if (!debug_objects_enabled) @@ -874,24 +841,26 @@ void debug_object_free(void *addr, const struct debug_obj_descr *descr) raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); - if (!obj) - goto out_unlock; + if (!obj) { + raw_spin_unlock_irqrestore(&db->lock, flags); + return; + } switch (obj->state) { case ODEBUG_STATE_ACTIVE: - state = obj->state; - raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "free"); - debug_object_fixup(descr->fixup_free, addr, state); - return; + break; default: hlist_del(&obj->node); raw_spin_unlock_irqrestore(&db->lock, flags); free_object(obj); return; } -out_unlock: + + o = *obj; raw_spin_unlock_irqrestore(&db->lock, flags); + debug_print_object(&o, "free"); + + debug_object_fixup(descr->fixup_free, addr, o.state); } EXPORT_SYMBOL_GPL(debug_object_free); @@ -943,10 +912,10 @@ void debug_object_active_state(void *addr, const struct debug_obj_descr *descr, unsigned int expect, unsigned int next) { + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; - bool print_object = false; if (!debug_objects_enabled) return; @@ -959,28 +928,19 @@ debug_object_active_state(void *addr, const struct debug_obj_descr *descr, if (obj) { switch (obj->state) { case ODEBUG_STATE_ACTIVE: - if (obj->astate == expect) - obj->astate = next; - else - print_object = true; - break; - + if (obj->astate != expect) + break; + obj->astate = next; + raw_spin_unlock_irqrestore(&db->lock, flags); + return; default: - print_object = true; break; } + o = *obj; } raw_spin_unlock_irqrestore(&db->lock, flags); - if (!obj) { - struct debug_obj o = { .object = addr, - .state = ODEBUG_STATE_NOTAVAILABLE, - .descr = descr }; - - debug_print_object(&o, "active_state"); - } else if (print_object) { - debug_print_object(obj, "active_state"); - } + debug_print_object(&o, "active_state"); } EXPORT_SYMBOL_GPL(debug_object_active_state); @@ -988,12 +948,10 @@ EXPORT_SYMBOL_GPL(debug_object_active_state); static void __debug_check_no_obj_freed(const void *address, unsigned long size) { unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; - const struct debug_obj_descr *descr; - enum debug_obj_state state; + int cnt, objs_checked = 0; + struct debug_obj *obj, o; struct debug_bucket *db; struct hlist_node *tmp; - struct debug_obj *obj; - int cnt, objs_checked = 0; saddr = (unsigned long) address; eaddr = saddr + size; @@ -1015,12 +973,10 @@ repeat: switch (obj->state) { case ODEBUG_STATE_ACTIVE: - descr = obj->descr; - state = obj->state; + o = *obj; raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "free"); - debug_object_fixup(descr->fixup_free, - (void *) oaddr, state); + debug_print_object(&o, "free"); + debug_object_fixup(o.descr->fixup_free, (void *)oaddr, o.state); goto repeat; default: hlist_del(&obj->node); diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c index 2b5cc70ac53f..dbedd99aa616 100644 --- a/lib/memcpy_kunit.c +++ b/lib/memcpy_kunit.c @@ -32,7 +32,7 @@ struct some_bytes { BUILD_BUG_ON(sizeof(instance.data) != 32); \ for (size_t i = 0; i < sizeof(instance.data); i++) { \ KUNIT_ASSERT_EQ_MSG(test, instance.data[i], v, \ - "line %d: '%s' not initialized to 0x%02x @ %d (saw 0x%02x)\n", \ + "line %d: '%s' not initialized to 0x%02x @ %zu (saw 0x%02x)\n", \ __LINE__, #instance, v, i, instance.data[i]); \ } \ } while (0) @@ -41,7 +41,7 @@ struct some_bytes { BUILD_BUG_ON(sizeof(one) != sizeof(two)); \ for (size_t i = 0; i < sizeof(one); i++) { \ KUNIT_EXPECT_EQ_MSG(test, one.data[i], two.data[i], \ - "line %d: %s.data[%d] (0x%02x) != %s.data[%d] (0x%02x)\n", \ + "line %d: %s.data[%zu] (0x%02x) != %s.data[%zu] (0x%02x)\n", \ __LINE__, #one, i, one.data[i], #two, i, two.data[i]); \ } \ kunit_info(test, "ok: " TEST_OP "() " name "\n"); \ diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c index 40f5908e57a4..e16dca1e23d5 100644 --- a/lib/mpi/ec.c +++ b/lib/mpi/ec.c @@ -584,6 +584,9 @@ void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, ctx->a = mpi_copy(a); ctx->b = mpi_copy(b); + ctx->d = NULL; + ctx->t.two_inv_p = NULL; + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; mpi_ec_get_reset(ctx); diff --git a/lib/nlattr.c b/lib/nlattr.c index dffd60e4065f..86344df0ccf7 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { @@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; /* diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index ce39ce9f3526..2829ddb0e316 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -170,8 +170,8 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *p) if (addr >= start && addr < start + IO_SPACE_LIMIT) return; - iounmap(p); #endif + iounmap(p); } EXPORT_SYMBOL(pci_iounmap); diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c index 4c40580a99a3..f247089d63c0 100644 --- a/lib/test_blackhole_dev.c +++ b/lib/test_blackhole_dev.c @@ -29,7 +29,6 @@ static int __init test_blackholedev_init(void) { struct ipv6hdr *ip6h; struct sk_buff *skb; - struct ethhdr *ethh; struct udphdr *uh; int data_len; int ret; @@ -61,7 +60,7 @@ static int __init test_blackholedev_init(void) ip6h->saddr = in6addr_loopback; ip6h->daddr = in6addr_loopback; /* Ether */ - ethh = (struct ethhdr *)skb_push(skb, sizeof(struct ethhdr)); + skb_push(skb, sizeof(struct ethhdr)); skb_set_mac_header(skb, 0); skb->protocol = htons(ETH_P_IPV6); diff --git a/mm/compaction.c b/mm/compaction.c index 8238e83385a7..23af5f3b2cca 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2570,16 +2570,11 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, struct page **capture) { - int may_perform_io = (__force int)(gfp_mask & __GFP_IO); struct zoneref *z; struct zone *zone; enum compact_result rc = COMPACT_SKIPPED; - /* - * Check if the GFP flags allow compaction - GFP_NOIO is really - * tricky context because the migration might require IO - */ - if (!may_perform_io) + if (!gfp_compaction_allowed(gfp_mask)) return COMPACT_SKIPPED; trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 63bdad20dbaf..98a678129b06 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); } +static void damon_lru_sort_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_lru_sort_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *hot_scheme, *cold_scheme; + struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL; unsigned int hot_thres, cold_thres; int err = 0; @@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + damon_for_each_scheme(scheme, ctx) { + if (!old_hot_scheme) { + old_hot_scheme = scheme; + continue; + } + old_cold_scheme = scheme; + } + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * hot_thres_access_freq / 1000; - scheme = damon_lru_sort_new_hot_scheme(hot_thres); - if (!scheme) + hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); + if (!hot_scheme) return -ENOMEM; - damon_set_schemes(ctx, &scheme, 1); + if (old_hot_scheme) + damon_lru_sort_copy_quota_status(&hot_scheme->quota, + &old_hot_scheme->quota); cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; - scheme = damon_lru_sort_new_cold_scheme(cold_thres); - if (!scheme) + cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); + if (!cold_scheme) { + damon_destroy_scheme(hot_scheme); return -ENOMEM; - damon_add_scheme(ctx, scheme); + } + if (old_cold_scheme) + damon_lru_sort_copy_quota_status(&cold_scheme->quota, + &old_cold_scheme->quota); + + damon_set_schemes(ctx, &hot_scheme, 1); + damon_add_scheme(ctx, cold_scheme); return damon_set_region_biggest_system_ram_default(target, &monitor_region_start, diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 162c9b1ca00f..cc337e94acfd 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -141,9 +141,20 @@ static struct damos *damon_reclaim_new_scheme(void) &damon_reclaim_wmarks); } +static void damon_reclaim_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_reclaim_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *old_scheme; int err = 0; err = damon_set_attrs(ctx, &damon_reclaim_mon_attrs); @@ -154,6 +165,11 @@ static int damon_reclaim_apply_parameters(void) scheme = damon_reclaim_new_scheme(); if (!scheme) return -ENOMEM; + if (!list_empty(&ctx->schemes)) { + damon_for_each_scheme(old_scheme, ctx) + damon_reclaim_copy_quota_status(&scheme->quota, + &old_scheme->quota); + } damon_set_schemes(ctx, &scheme, 1); return damon_set_region_biggest_system_ram_default(target, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 59577946735b..9736e762184b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -607,6 +608,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret; + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; + if (off_end <= off_align || (off_end - off_align) < size) return 0; diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 0d59098f0876..cef683a2e0d2 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -415,7 +415,8 @@ static void kmalloc_oob_16(struct kunit *test) /* This test is specifically crafted for the generic mode. */ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + /* RELOC_HIDE to prevent gcc from warning about short alloc */ + ptr1 = RELOC_HIDE(kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL), 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); diff --git a/mm/memblock.c b/mm/memblock.c index f71c8b0f8ac7..0070d6c2c997 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -200,8 +200,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) +unsigned long __init_memblock +memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, + phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9da98e3e71cf..4570d3e315cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7517,9 +7517,13 @@ bool mem_cgroup_swap_full(struct folio *folio) static int __init setup_swap_account(char *s) { - pr_warn_once("The swapaccount= commandline option is deprecated. " - "Please report your usecase to linux-mm@kvack.org if you " - "depend on this functionality.\n"); + bool res; + + if (!kstrtobool(s, &res) && !res) + pr_warn_once("The swapaccount=0 commandline option is deprecated " + "in favor of configuring swap control via cgroupfs. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); return 1; } __setup("swapaccount=", setup_swap_account); diff --git a/mm/memory.c b/mm/memory.c index fc8b264ec0ca..fb83cf56377a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3761,6 +3761,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct page *page; struct swap_info_struct *si = NULL; rmap_t rmap_flags = RMAP_NONE; + bool need_clear_cache = false; bool exclusive = false; swp_entry_t entry; pte_t pte; @@ -3822,6 +3823,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); @@ -4073,6 +4088,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -4086,6 +4104,8 @@ out_release: folio_unlock(swapcache); folio_put(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/memtest.c b/mm/memtest.c index f53ace709ccd..d407373f225b 100644 --- a/mm/memtest.c +++ b/mm/memtest.c @@ -46,10 +46,10 @@ static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size last_bad = 0; for (p = start; p < end; p++) - *p = pattern; + WRITE_ONCE(*p, pattern); for (p = start; p < end; p++, start_phys_aligned += incr) { - if (*p == pattern) + if (READ_ONCE(*p) == pattern) continue; if (start_phys_aligned == last_bad + incr) { last_bad += incr; diff --git a/mm/migrate.c b/mm/migrate.c index c93dd6a31c31..c5968021fde0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -423,8 +423,12 @@ int folio_migrate_mapping(struct address_space *mapping, if (folio_test_swapbacked(folio)) { __folio_set_swapbacked(newfolio); if (folio_test_swapcache(folio)) { + int i; + folio_set_swapcache(newfolio); - newfolio->private = folio_get_private(folio); + for (i = 0; i < nr; i++) + set_page_private(folio_page(newfolio, i), + page_private(folio_page(folio, i))); } entries = nr; } else { diff --git a/mm/page-writeback.c b/mm/page-writeback.c index de5f69921b94..d3e9d12860b9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1526,7 +1526,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1e824bce3393..c1f831aefe7b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4927,14 +4927,9 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, else (*no_progress_loops)++; - /* - * Make sure we converge to OOM if we cannot make any progress - * several times in the row. - */ - if (*no_progress_loops > MAX_RECLAIM_RETRIES) { - /* Before OOM, exhaust highatomic_reserve */ - return unreserve_highatomic_pageblock(ac, true); - } + if (*no_progress_loops > MAX_RECLAIM_RETRIES) + goto out; + /* * Keep reclaiming pages while there is a chance this will lead @@ -4977,6 +4972,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, schedule_timeout_uninterruptible(1); else cond_resched(); +out: + /* Before OOM, exhaust highatomic_reserve */ + if (!ret) + return unreserve_highatomic_pageblock(ac, true); + return ret; } @@ -5018,6 +5018,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) { bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; + bool can_compact = gfp_compaction_allowed(gfp_mask); const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER; struct page *page = NULL; unsigned int alloc_flags; @@ -5096,7 +5097,7 @@ restart: * Don't try this for allocations that are allowed to ignore * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen. */ - if (can_direct_reclaim && + if (can_direct_reclaim && can_compact && (costly_order || (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) && !gfp_pfmemalloc_allowed(gfp_mask)) { @@ -5194,9 +5195,10 @@ retry: /* * Do not retry costly high order allocations unless they are - * __GFP_RETRY_MAYFAIL + * __GFP_RETRY_MAYFAIL and we can compact */ - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) + if (costly_order && (!can_compact || + !(gfp_mask & __GFP_RETRY_MAYFAIL))) goto nopage; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, @@ -5209,7 +5211,7 @@ retry: * implementation of the compaction depends on the sufficient amount * of free memory (see __compaction_suitable) */ - if (did_some_progress > 0 && + if (did_some_progress > 0 && can_compact && should_compact_retry(ac, order, alloc_flags, compact_result, &compact_priority, &compaction_retries)) diff --git a/mm/readahead.c b/mm/readahead.c index 89d01b26d90b..08c500da6b85 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -504,7 +504,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -612,7 +612,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; diff --git a/mm/sparse.c b/mm/sparse.c index e5a8a3a0edd7..05d1e7b6c6db 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -792,6 +792,13 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, if (empty) { unsigned long section_nr = pfn_to_section_nr(pfn); + /* + * Mark the section invalid so that valid_section() + * return false. This prevents code from dereferencing + * ms->usage array. + */ + ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; + /* * When removing an early section, the usage map is kept (as the * usage maps of other sections fall into the same page). It @@ -800,16 +807,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, * was allocated during boot. */ if (!PageReserved(virt_to_page(ms->usage))) { - kfree(ms->usage); - ms->usage = NULL; + kfree_rcu(ms->usage, rcu); + WRITE_ONCE(ms->usage, NULL); } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - /* - * Mark the section invalid so that valid_section() - * return false. This prevents code from dereferencing - * ms->usage array. - */ - ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; } /* diff --git a/mm/swap.h b/mm/swap.h index cc08c459c619..5eff40ef7693 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -39,6 +39,7 @@ void __delete_from_swap_cache(struct folio *folio, void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); @@ -98,6 +99,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 71db6d8a1ea3..0d6182db44a6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1222,6 +1222,18 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } /* + * When we get a swap entry, if there aren't some other ways to + * prevent swapoff, such as the folio in swap cache is locked, page + * table lock is held, etc., the swap entry may become invalid because + * of swapoff. Then, we need to enclose all swap related functions + * with get_swap_device() and put_swap_device(), unless the swap + * functions call get/put_swap_device() by themselves. + * + * Note that when only holding the PTL, swapoff might succeed immediately + * after freeing a swap entry. Therefore, immediately after + * __swap_entry_free(), the swap info might become stale and should not + * be touched without a prior get_swap_device(). + * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until @@ -1230,9 +1242,8 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, * Notice that swapoff or swapoff+swapon can still happen before the * percpu_ref_tryget_live() in get_swap_device() or after the * percpu_ref_put() in put_swap_device() if there isn't any other way - * to prevent swapoff, such as page lock, page table lock, etc. The - * caller must be prepared for that. For example, the following - * situation is possible. + * to prevent swapoff. The caller must be prepared for that. For + * example, the following situation is possible. * * CPU1 CPU2 * do_swap_page() @@ -1624,13 +1635,19 @@ int free_swap_and_cache(swp_entry_t entry) if (non_swap_entry(entry)) return 1; - p = _swap_info_get(entry); + p = get_swap_device(entry); if (p) { + if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) { + put_swap_device(p); + return 0; + } + count = __swap_entry_free(p, entry); if (count == SWAP_HAS_CACHE && !swap_page_trans_huge_swapped(p, entry)) __try_to_reclaim_swap(p, swp_offset(entry), TTRS_UNMAPPED | TTRS_FULL); + put_swap_device(p); } return p != NULL; } @@ -3373,6 +3390,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 650ab6cfd5f4..992a0a16846f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -327,6 +327,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, enum mcopy_atomic_mode mode, bool wp_copy) { @@ -445,6 +446,15 @@ retry: goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -480,6 +490,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, enum mcopy_atomic_mode mode, bool wp_copy); #endif /* CONFIG_HUGETLB_PAGE */ @@ -601,8 +612,8 @@ retry: */ if (is_vm_hugetlb_page(dst_vma)) return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, - src_start, len, mcopy_mode, - wp_copy); + src_start, len, mmap_changing, + mcopy_mode, wp_copy); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; diff --git a/mm/vmscan.c b/mm/vmscan.c index 9f3cfb7caa48..a3b1d8e5dbb3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6024,7 +6024,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) /* Use reclaim/compaction for costly allocs or under memory pressure */ static bool in_reclaim_compaction(struct scan_control *sc) { - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + if (gfp_compaction_allowed(sc->gfp_mask) && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || sc->priority < DEF_PRIORITY - 2)) return true; @@ -6266,6 +6266,9 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) unsigned long watermark; enum compact_result suitable; + if (!gfp_compaction_allowed(sc->gfp_mask)) + return false; + suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); if (suitable == COMPACT_SUCCESS) /* Allocation should succeed already. Don't reclaim. */ diff --git a/mm/zswap.c b/mm/zswap.c index b3829ada4a41..b7cb126797f9 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1013,6 +1013,8 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) { spin_unlock(&tree->lock); delete_from_swap_cache(page_folio(page)); + unlock_page(page); + put_page(page); ret = -ENOMEM; goto fail; } diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 214532173536..a3b68243fd4b 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f1b7510359e4..3f9ff02baafe 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -264,14 +264,11 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; - lock_sock(sk); - skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - release_sock(sk); return err; } @@ -297,8 +294,6 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); - release_sock(sk); - if (flags & MSG_TRUNC) copied = skblen; @@ -521,10 +516,11 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (sk->sk_state == BT_LISTEN) return -EINVAL; - lock_sock(sk); + spin_lock(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; - release_sock(sk); + spin_unlock(&sk->sk_receive_queue.lock); + err = put_user(amount, (int __user *)arg); break; diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 8a85f6cdfbc1..1bc51e2b05a3 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -13,48 +13,33 @@ #define PNP_INFO_SVCLASS_ID 0x1200 -static u8 eir_append_name(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) -{ - u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; - - /* If data is already NULL terminated just pass it directly */ - if (data[data_len - 1] == '\0') - return eir_append_data(eir, eir_len, type, data, data_len); - - memcpy(name, data, HCI_MAX_SHORT_NAME_LENGTH); - name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; - - return eir_append_data(eir, eir_len, type, name, sizeof(name)); -} - u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { size_t short_len; size_t complete_len; - /* no space left for name (+ NULL + type + len) */ - if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + /* no space left for name (+ type + len) */ + if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 2) return ad_len; /* use complete name if present and fits */ complete_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) - return eir_append_name(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len + 1); + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len); /* use short name if present */ short_len = strnlen(hdev->short_name, sizeof(hdev->short_name)); if (short_len) - return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, hdev->short_name, - short_len == HCI_MAX_SHORT_NAME_LENGTH ? - short_len : short_len + 1); + short_len); /* use shortened full name if present, we already know that name * is longer then HCI_MAX_SHORT_NAME_LENGTH */ if (complete_len) - return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 12d36875358b..bac5a369d2be 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -175,57 +175,6 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_dev_put(hdev); } -static void le_scan_cleanup(struct work_struct *work) -{ - struct hci_conn *conn = container_of(work, struct hci_conn, - le_scan_cleanup); - struct hci_dev *hdev = conn->hdev; - struct hci_conn *c = NULL; - - BT_DBG("%s hcon %p", hdev->name, conn); - - hci_dev_lock(hdev); - - /* Check that the hci_conn is still around */ - rcu_read_lock(); - list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) { - if (c == conn) - break; - } - rcu_read_unlock(); - - if (c == conn) { - hci_connect_le_scan_cleanup(conn, 0x00); - hci_conn_cleanup(conn); - } - - hci_dev_unlock(hdev); - hci_dev_put(hdev); - hci_conn_put(conn); -} - -static void hci_connect_le_scan_remove(struct hci_conn *conn) -{ - BT_DBG("%s hcon %p", conn->hdev->name, conn); - - /* We can't call hci_conn_del/hci_conn_cleanup here since that - * could deadlock with another hci_conn_del() call that's holding - * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work). - * Instead, grab temporary extra references to the hci_dev and - * hci_conn and perform the necessary cleanup in a separate work - * callback. - */ - - hci_dev_hold(conn->hdev); - hci_conn_get(conn); - - /* Even though we hold a reference to the hdev, many other - * things might get cleaned up meanwhile, including the hdev's - * own workqueue, so we can't use that for scheduling. - */ - schedule_work(&conn->le_scan_cleanup); -} - static void hci_acl_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -672,13 +621,6 @@ static void hci_conn_timeout(struct work_struct *work) if (refcnt > 0) return; - /* LE connections in scanning state need special handling */ - if (conn->state == BT_CONNECT && conn->type == LE_LINK && - test_bit(HCI_CONN_SCANNING, &conn->flags)) { - hci_connect_le_scan_remove(conn); - return; - } - hci_abort_conn(conn, hci_proto_disconn_ind(conn)); } @@ -1050,7 +992,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); - INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup); atomic_set(&conn->refcnt, 0); @@ -2837,81 +2778,46 @@ u32 hci_conn_get_phy(struct hci_conn *conn) return phys; } -int hci_abort_conn(struct hci_conn *conn, u8 reason) +static int abort_conn_sync(struct hci_dev *hdev, void *data) { - int r = 0; + struct hci_conn *conn; + u16 handle = PTR_ERR(data); - if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) return 0; - switch (conn->state) { - case BT_CONNECTED: - case BT_CONFIG: - if (conn->type == AMP_LINK) { - struct hci_cp_disconn_phy_link cp; + return hci_abort_conn_sync(hdev, conn, conn->abort_reason); +} - cp.phy_handle = HCI_PHY_HANDLE(conn->handle); - cp.reason = reason; - r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, - sizeof(cp), &cp); - } else { - struct hci_cp_disconnect dc; +int hci_abort_conn(struct hci_conn *conn, u8 reason) +{ + struct hci_dev *hdev = conn->hdev; - dc.handle = cpu_to_le16(conn->handle); - dc.reason = reason; - r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, - sizeof(dc), &dc); + /* If abort_reason has already been set it means the connection is + * already being aborted so don't attempt to overwrite it. + */ + if (conn->abort_reason) + return 0; + + bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason); + + conn->abort_reason = reason; + + /* If the connection is pending check the command opcode since that + * might be blocking on hci_cmd_sync_work while waiting its respective + * event so we need to hci_cmd_sync_cancel to cancel it. + */ + if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { + switch (hci_skb_event(hdev->sent_cmd)) { + case HCI_EV_LE_CONN_COMPLETE: + case HCI_EV_LE_ENHANCED_CONN_COMPLETE: + case HCI_EVT_LE_CIS_ESTABLISHED: + hci_cmd_sync_cancel(hdev, ECANCELED); + break; } - - conn->state = BT_DISCONN; - - break; - case BT_CONNECT: - if (conn->type == LE_LINK) { - if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - break; - r = hci_send_cmd(conn->hdev, - HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); - } else if (conn->type == ACL_LINK) { - if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) - break; - r = hci_send_cmd(conn->hdev, - HCI_OP_CREATE_CONN_CANCEL, - 6, &conn->dst); - } - break; - case BT_CONNECT2: - if (conn->type == ACL_LINK) { - struct hci_cp_reject_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - rej.reason = reason; - - r = hci_send_cmd(conn->hdev, - HCI_OP_REJECT_CONN_REQ, - sizeof(rej), &rej); - } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { - struct hci_cp_reject_sync_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - - /* SCO rejection has its own limited set of - * allowed error values (0x0D-0x0F) which isn't - * compatible with most values passed to this - * function. To be safe hard-code one of the - * values that's suitable for SCO. - */ - rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; - - r = hci_send_cmd(conn->hdev, - HCI_OP_REJECT_SYNC_CONN_REQ, - sizeof(rej), &rej); - } - break; - default: - conn->state = BT_CLOSED; - break; } - return r; + return hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), + NULL); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6a1db678d032..02e67ff05b7b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -908,7 +908,7 @@ int hci_get_dev_info(void __user *arg) else flags = hdev->flags; - strcpy(di.name, hdev->name); + strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); di.flags = flags; @@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + hci_dev_hold(hdev); BT_DBG("%s", hdev->name); if (hdev->hw_error) @@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work) else bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); - if (hci_dev_do_close(hdev)) - return; + if (!hci_dev_do_close(hdev)) + hci_dev_do_open(hdev); - hci_dev_do_open(hdev); + hci_dev_put(hdev); } void hci_uuids_clear(struct hci_dev *hdev) @@ -1490,11 +1491,12 @@ static void hci_cmd_timeout(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_timer.work); - if (hdev->sent_cmd) { - struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; - u16 opcode = __le16_to_cpu(sent->opcode); + if (hdev->req_skb) { + u16 opcode = hci_skb_opcode(hdev->req_skb); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); + + hci_cmd_sync_cancel_sync(hdev, ETIMEDOUT); } else { bt_dev_err(hdev, "command tx timeout"); } @@ -2790,6 +2792,7 @@ void hci_release_dev(struct hci_dev *hdev) ida_simple_remove(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); + kfree_skb(hdev->req_skb); kfree_skb(hdev->recv_event); kfree(hdev); } @@ -2821,6 +2824,23 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev) return ret; } +/* Cancel ongoing command synchronously: + * + * - Cancel command timer + * - Reset command counter + * - Cancel command request + */ +static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) +{ + bt_dev_dbg(hdev, "err 0x%2.2x", err); + + cancel_delayed_work_sync(&hdev->cmd_timer); + cancel_delayed_work_sync(&hdev->ncmd_timer); + atomic_set(&hdev->cmd_cnt, 1); + + hci_cmd_sync_cancel_sync(hdev, err); +} + /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { @@ -2837,6 +2857,9 @@ int hci_suspend_dev(struct hci_dev *hdev) if (mgmt_powering_down(hdev)) return 0; + /* Cancel potentially blocking sync operation before suspend */ + hci_cancel_cmd_sync(hdev, EHOSTDOWN); + hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); hci_req_sync_unlock(hdev); @@ -3099,21 +3122,33 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_send); /* Get data from the previously sent command */ -void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) +static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode) { struct hci_command_hdr *hdr; - if (!hdev->sent_cmd) + if (!skb || skb->len < HCI_COMMAND_HDR_SIZE) return NULL; - hdr = (void *) hdev->sent_cmd->data; + hdr = (void *)skb->data; if (hdr->opcode != cpu_to_le16(opcode)) return NULL; - BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); + return skb->data + HCI_COMMAND_HDR_SIZE; +} - return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; +/* Get data from the previously sent command */ +void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) +{ + void *data; + + /* Check if opcode matches last sent command */ + data = hci_cmd_data(hdev->sent_cmd, opcode); + if (!data) + /* Check if opcode matches last request */ + data = hci_cmd_data(hdev->req_skb, opcode); + + return data; } /* Get data from last received event */ @@ -4009,17 +4044,19 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, if (!status && !hci_req_is_complete(hdev)) return; + skb = hdev->req_skb; + /* If this was the last command in a request the complete - * callback would be found in hdev->sent_cmd instead of the + * callback would be found in hdev->req_skb instead of the * command queue (hdev->cmd_q). */ - if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { - *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; + if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; return; } - if (bt_cb(hdev->sent_cmd)->hci.req_complete) { - *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; + if (skb && bt_cb(skb)->hci.req_complete) { + *req_complete = bt_cb(skb)->hci.req_complete; return; } @@ -4115,6 +4152,36 @@ static void hci_rx_work(struct work_struct *work) } } +static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) +{ + int err; + + bt_dev_dbg(hdev, "skb %p", skb); + + kfree_skb(hdev->sent_cmd); + + hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); + if (!hdev->sent_cmd) { + skb_queue_head(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); + return; + } + + err = hci_send_frame(hdev, skb); + if (err < 0) { + hci_cmd_sync_cancel_sync(hdev, -err); + return; + } + + if (hci_req_status_pend(hdev) && + !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { + kfree_skb(hdev->req_skb); + hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); + } + + atomic_dec(&hdev->cmd_cnt); +} + static void hci_cmd_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work); @@ -4129,30 +4196,15 @@ static void hci_cmd_work(struct work_struct *work) if (!skb) return; - kfree_skb(hdev->sent_cmd); + hci_send_cmd_sync(hdev, skb); - hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); - if (hdev->sent_cmd) { - int res; - if (hci_req_status_pend(hdev)) - hci_dev_set_flag(hdev, HCI_CMD_PENDING); - atomic_dec(&hdev->cmd_cnt); - - res = hci_send_frame(hdev, skb); - if (res < 0) - __hci_cmd_sync_cancel(hdev, -res); - - rcu_read_lock(); - if (test_bit(HCI_RESET, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - cancel_delayed_work(&hdev->cmd_timer); - else - queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, - HCI_CMD_TIMEOUT); - rcu_read_unlock(); - } else { - skb_queue_head(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); - } + rcu_read_lock(); + if (test_bit(HCI_RESET, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + cancel_delayed_work(&hdev->cmd_timer); + else + queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, + HCI_CMD_TIMEOUT); + rcu_read_unlock(); } } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 56ecc5f97b91..b150dee88f35 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1761,7 +1761,7 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr, { struct discovery_state *d = &hdev->discovery; - if (len > HCI_MAX_AD_LENGTH) + if (len > max_adv_len(hdev)) return; bacpy(&d->last_adv_addr, bdaddr); @@ -3567,8 +3567,6 @@ static void hci_remote_name_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - hci_conn_check_pending(hdev); - hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); @@ -4331,7 +4329,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, * (since for this kind of commands there will not be a command * complete event). */ - if (ev->status || (hdev->sent_cmd && !hci_skb_event(hdev->sent_cmd))) { + if (ev->status || (hdev->req_skb && !hci_skb_event(hdev->req_skb))) { hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { @@ -5282,9 +5280,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn || !hci_conn_ssp_enabled(conn)) + if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) goto unlock; + /* Assume remote supports SSP since it has triggered this event */ + set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); + hci_conn_hold(conn); if (!hci_dev_test_flag(hdev, HCI_MGMT)) @@ -6239,8 +6240,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, return; } - if (!ext_adv && len > HCI_MAX_AD_LENGTH) { - bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes"); + if (len > max_adv_len(hdev)) { + bt_dev_err_ratelimited(hdev, + "adv larger than maximum supported"); return; } @@ -6305,7 +6307,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved, type); - if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) { + if (!ext_adv && conn && type == LE_ADV_IND && + len <= max_adv_len(hdev)) { /* Store report for later inclusion by * mgmt_device_connected */ @@ -6446,7 +6449,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, info->length + 1)) break; - if (info->length <= HCI_MAX_AD_LENGTH) { + if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, NULL, 0, rssi, @@ -6716,6 +6719,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_UNKNOWN_CONN_ID); + if (max > hcon->le_conn_max_interval) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_INVALID_LL_PARAMS); @@ -7142,10 +7149,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "subevent 0x%2.2x", ev->subevent); /* Only match event if command OGF is for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) == 0x08 && - hci_skb_event(hdev->sent_cmd) == ev->subevent) { - *opcode = hci_skb_opcode(hdev->sent_cmd); + if (hdev->req_skb && + hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 && + hci_skb_event(hdev->req_skb) == ev->subevent) { + *opcode = hci_skb_opcode(hdev->req_skb); hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, req_complete_skb); } @@ -7245,10 +7252,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, * keep track of the bdaddr of the connection event that woke us up. */ if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); + bacpy(&hdev->wake_addr, &conn_request->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); + bacpy(&hdev->wake_addr, &conn_complete->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_LE_META) { struct hci_ev_le_meta *le_ev = (void *)skb->data; @@ -7532,10 +7539,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) } /* Only match event if command OGF is not for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) != 0x08 && - hci_skb_event(hdev->sent_cmd) == event) { - hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->sent_cmd), + if (hdev->req_skb && + hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) != 0x08 && + hci_skb_event(hdev->req_skb) == event) { + hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->req_skb), status, &req_complete, &req_complete_skb); req_evt = event; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index f7e006a36382..4468647df672 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -916,7 +916,7 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { - __hci_cmd_sync_cancel(hdev, ENODEV); + hci_cmd_sync_cancel_sync(hdev, ENODEV); cancel_interleave_scan(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d74fe13f3dce..7e64cf880f9f 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -31,6 +31,10 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; + /* Free the request command so it is not used as response */ + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + if (skb) { struct sock *sk = hci_skb_sk(skb); @@ -38,7 +42,7 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (sk) sock_put(sk); - hdev->req_skb = skb_get(skb); + hdev->req_rsp = skb_get(skb); } wake_up_interruptible(&hdev->req_wait_q); @@ -186,8 +190,8 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hdev->req_status = 0; hdev->req_result = 0; - skb = hdev->req_skb; - hdev->req_skb = NULL; + skb = hdev->req_rsp; + hdev->req_rsp = NULL; bt_dev_dbg(hdev, "end: err %d", err); @@ -651,22 +655,6 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) mutex_unlock(&hdev->cmd_sync_work_lock); } -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) -{ - bt_dev_dbg(hdev, "err 0x%2.2x", err); - - if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = err; - hdev->req_status = HCI_REQ_CANCELED; - - cancel_delayed_work_sync(&hdev->cmd_timer); - cancel_delayed_work_sync(&hdev->ncmd_timer); - atomic_set(&hdev->cmd_cnt, 1); - - wake_up_interruptible(&hdev->req_wait_q); - } -} - void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -680,8 +668,33 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) } EXPORT_SYMBOL(hci_cmd_sync_cancel); -int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, - void *data, hci_cmd_sync_work_destroy_t destroy) +/* Cancel ongoing command request synchronously: + * + * - Set result and mark status to HCI_REQ_CANCELED + * - Wakeup command sync thread + */ +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) +{ + bt_dev_dbg(hdev, "err 0x%2.2x", err); + + if (hdev->req_status == HCI_REQ_PEND) { + /* req_result is __u32 so error must be positive to be properly + * propagated. + */ + hdev->req_result = err < 0 ? -err : err; + hdev->req_status = HCI_REQ_CANCELED; + + wake_up_interruptible(&hdev->req_wait_q); + } +} +EXPORT_SYMBOL(hci_cmd_sync_cancel_sync); + +/* Submit HCI command to be run in as cmd_sync_work: + * + * - hdev must _not_ be unregistered + */ +int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; int err = 0; @@ -711,6 +724,23 @@ unlock: mutex_unlock(&hdev->unregister_lock); return err; } +EXPORT_SYMBOL(hci_cmd_sync_submit); + +/* Queue HCI command: + * + * - hdev must be running + */ +int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + /* Only queue command if hdev is running which means it had been opened + * and is either on init phase or is already up. + */ + if (!test_bit(HCI_RUNNING, &hdev->flags)) + return -ENETDOWN; + + return hci_cmd_sync_submit(hdev, func, data, destroy); +} EXPORT_SYMBOL(hci_cmd_sync_queue); int hci_update_eir_sync(struct hci_dev *hdev) @@ -2251,8 +2281,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, /* During suspend, only wakeable devices can be in acceptlist */ if (hdev->suspended && - !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) + !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) { + hci_le_del_accept_list_sync(hdev, ¶ms->addr, + params->addr_type); return 0; + } /* Select filter policy to accept all advertising */ if (*num_entries >= hdev->le_accept_list_size) @@ -3780,12 +3813,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { events[4] |= 0x01; /* Flow Specification Complete */ - /* Don't set Disconnect Complete when suspended as that - * would wakeup the host when disconnecting due to - * suspend. + /* Don't set Disconnect Complete and mode change when + * suspended as that would wakeup the host when disconnecting + * due to suspend. */ - if (hdev->suspended) + if (hdev->suspended) { events[0] &= 0xef; + events[2] &= 0xf7; + } } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); @@ -4851,6 +4886,11 @@ int hci_dev_open_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + if (hdev->req_skb) { + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + } + clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); @@ -5012,6 +5052,12 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + /* Drop last request */ + if (hdev->req_skb) { + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + } + clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); @@ -5204,22 +5250,27 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, } static int hci_le_connect_cancel_sync(struct hci_dev *hdev, - struct hci_conn *conn) + struct hci_conn *conn, u8 reason) { + /* Return reason if scanning since the connection shall probably be + * cleanup directly. + */ if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - return 0; + return reason; - if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + if (conn->role == HCI_ROLE_SLAVE || + test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); } -static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn) +static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, + u8 reason) { if (conn->type == LE_LINK) - return hci_le_connect_cancel_sync(hdev, conn); + return hci_le_connect_cancel_sync(hdev, conn, reason); if (hdev->hci_ver < BLUETOOTH_VER_1_2) return 0; @@ -5272,9 +5323,11 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) case BT_CONFIG: return hci_disconnect_sync(hdev, conn, reason); case BT_CONNECT: - err = hci_connect_cancel_sync(hdev, conn); + err = hci_connect_cancel_sync(hdev, conn, reason); /* Cleanup hci_conn object if it cannot be cancelled as it - * likelly means the controller and host stack are out of sync. + * likelly means the controller and host stack are out of sync + * or in case of LE it was still scanning so it can be cleanup + * safely. */ if (err) { hci_dev_lock(hdev); @@ -5480,7 +5533,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) bt_dev_dbg(hdev, ""); - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) + if (test_bit(HCI_INQUIRY, &hdev->flags)) return 0; hci_dev_lock(hdev); @@ -6189,7 +6242,7 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) done: if (err == -ETIMEDOUT) - hci_le_connect_cancel_sync(hdev, conn); + hci_le_connect_cancel_sync(hdev, conn, 0x00); /* Re-enable advertising after the connection attempt is finished. */ hci_resume_advertising_sync(hdev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4c5793053393..b4cba55be5ad 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - err = hci_check_conn_params(min, max, latency, to_multiplier); + if (max > hcon->le_conn_max_interval) { + BT_DBG("requested connection interval exceeds current bounds."); + err = -EINVAL; + } else { + err = hci_check_conn_params(min, max, latency, to_multiplier); + } + if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else @@ -6527,7 +6533,8 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); l2cap_sig_send_rej(conn, cmd->ident); - break; + skb_pull(skb, len > skb->len ? skb->len : len); + continue; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6d631a2e6016..716f6dc4934b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1039,6 +1039,8 @@ static void rpa_expired(struct work_struct *work) hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL); } +static int set_discoverable_sync(struct hci_dev *hdev, void *data); + static void discov_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1057,7 +1059,7 @@ static void discov_off(struct work_struct *work) hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hdev->discov_timeout = 0; - hci_update_discoverable(hdev); + hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL); mgmt_new_settings(hdev); @@ -1399,8 +1401,16 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, - mgmt_set_powered_complete); + /* Cancel potentially blocking sync operation before power off */ + if (cp->val == 0x00) { + hci_cmd_sync_cancel_sync(hdev, -EHOSTDOWN); + err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, + mgmt_set_powered_complete); + } else { + /* Use hci_cmd_sync_submit since hdev might not be running */ + err = hci_cmd_sync_submit(hdev, set_powered_sync, cmd, + mgmt_set_powered_complete); + } if (err < 0) mgmt_pending_remove(cmd); @@ -3573,18 +3583,6 @@ unlock: return err; } -static int abort_conn_sync(struct hci_dev *hdev, void *data) -{ - struct hci_conn *conn; - u16 handle = PTR_ERR(data); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; - - return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); -} - static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -3635,8 +3633,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, le_addr_type(addr->type)); if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) - hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), - NULL); + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); unlock: hci_dev_unlock(hdev); @@ -5381,9 +5378,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_AD_LENGTH || - length > HCI_MAX_AD_LENGTH || - (offset + length) > HCI_MAX_AD_LENGTH) + if (offset >= HCI_MAX_EXT_AD_LENGTH || + length > HCI_MAX_EXT_AD_LENGTH || + (offset + length) > HCI_MAX_EXT_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); @@ -8439,8 +8436,8 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, supported_flags = get_supported_adv_flags(hdev); rp->supported_flags = cpu_to_le32(supported_flags); - rp->max_adv_data_len = HCI_MAX_AD_LENGTH; - rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; + rp->max_adv_data_len = max_adv_len(hdev); + rp->max_scan_rsp_len = max_adv_len(hdev); rp->max_instances = hdev->le_num_of_adv_sets; rp->num_instances = hdev->adv_instance_cnt; @@ -8468,7 +8465,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, static u8 calculate_name_len(struct hci_dev *hdev) { - u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; + u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 2]; /* len + type + name */ return eir_append_local_name(hdev, buf, 0); } @@ -8476,7 +8473,7 @@ static u8 calculate_name_len(struct hci_dev *hdev) static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, bool is_adv_data) { - u8 max_len = HCI_MAX_AD_LENGTH; + u8 max_len = max_adv_len(hdev); if (is_adv_data) { if (adv_flags & (MGMT_ADV_FLAG_DISCOV | @@ -9764,14 +9761,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; - /* The connection is still in hci_conn_hash so test for 1 - * instead of 0 to know if this is the last one. - */ - if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) { - cancel_delayed_work(&hdev->power_off); - queue_work(hdev->req_workqueue, &hdev->power_off.work); - } - if (!mgmt_connected) return; @@ -9828,14 +9817,6 @@ void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, { struct mgmt_ev_connect_failed ev; - /* The connection is still in hci_conn_hash so test for 1 - * instead of 0 to know if this is the last one. - */ - if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) { - cancel_delayed_work(&hdev->power_off); - queue_work(hdev->req_workqueue, &hdev->power_off.work); - } - bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.status = mgmt_status(status); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 8d6fce9005bd..4f54c7df3a94 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1937,7 +1937,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - if (!skb_linearize(skb)) { + if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) { s = rfcomm_recv_frame(s, skb); if (!s) break; diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c index 5c4c369f8536..2faab44652e7 100644 --- a/net/bridge/br_cfm_netlink.c +++ b/net/bridge/br_cfm_netlink.c @@ -362,7 +362,7 @@ static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr, memset(&tx_info, 0, sizeof(tx_info)); - instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]); nla_memcpy(&tx_info.dmac.addr, tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC], sizeof(tx_info.dmac.addr)); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index db4f2641d1cd..9765f9f9bf7f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1607,6 +1607,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t) } #endif +static void br_multicast_query_delay_expired(struct timer_list *t) +{ +} + static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) @@ -3024,7 +3028,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, unsigned long max_delay) { if (!timer_pending(&query->timer)) - query->delay_time = jiffies + max_delay; + mod_timer(&query->delay_timer, jiffies + max_delay); mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } @@ -3867,13 +3871,11 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_querier_interval = 255 * HZ; brmctx->multicast_membership_interval = 260 * HZ; - brmctx->ip4_other_query.delay_time = 0; brmctx->ip4_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; - brmctx->ip6_other_query.delay_time = 0; brmctx->ip6_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock); #endif @@ -3882,6 +3884,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip4_multicast_local_router_expired, 0); timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); + timer_setup(&brmctx->ip4_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) @@ -3889,6 +3893,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip6_multicast_local_router_expired, 0); timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); + timer_setup(&brmctx->ip6_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif @@ -4023,10 +4029,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { del_timer_sync(&brmctx->ip4_mc_router_timer); del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_other_query.delay_timer); del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&brmctx->ip6_mc_router_timer); del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_other_query.delay_timer); del_timer_sync(&brmctx->ip6_own_query.timer); #endif } @@ -4469,13 +4477,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) max_delay = brmctx->multicast_query_response_interval; if (!timer_pending(&brmctx->ip4_other_query.timer)) - brmctx->ip4_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip4_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) if (!timer_pending(&brmctx->ip6_other_query.timer)) - brmctx->ip6_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip6_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 202ad43e35d6..bff48d576363 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -43,6 +43,10 @@ #include #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + static unsigned int brnf_net_id __read_mostly; struct brnf_net { @@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_STOLEN; } +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +/* conntracks' nf_confirm logic cannot handle cloned skbs referencing + * the same nf_conn entry, which will happen for multicast (broadcast) + * Frames on bridges. + * + * Example: + * macvlan0 + * br0 + * ethX ethY + * + * ethX (or Y) receives multicast or broadcast packet containing + * an IP packet, not yet in conntrack table. + * + * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. + * -> skb->_nfct now references a unconfirmed entry + * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge + * interface. + * 3. skb gets passed up the stack. + * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb + * and schedules a work queue to send them out on the lower devices. + * + * The clone skb->_nfct is not a copy, it is the same entry as the + * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. + * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. + * + * The Macvlan broadcast worker and normal confirm path will race. + * + * This race will not happen if step 2 already confirmed a clone. In that + * case later steps perform skb_clone() with skb->_nfct already confirmed (in + * hash table). This works fine. + * + * But such confirmation won't happen when eb/ip/nftables rules dropped the + * packets before they reached the nf_confirm step in postrouting. + * + * Work around this problem by explicit confirmation of the entry at + * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed + * entry. + * + */ +static unsigned int br_nf_local_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conntrack *nfct = skb_nfct(skb); + const struct nf_ct_hook *ct_hook; + struct nf_conn *ct; + int ret; + + if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + ct = container_of(nfct, struct nf_conn, ct_general); + if (likely(nf_ct_is_confirmed(ct))) + return NF_ACCEPT; + + WARN_ON_ONCE(skb_shared(skb)); + WARN_ON_ONCE(refcount_read(&nfct->use) != 1); + + /* We can't call nf_confirm here, it would create a dependency + * on nf_conntrack module. + */ + ct_hook = rcu_dereference(nf_ct_hook); + if (!ct_hook) { + skb->_nfct = 0ul; + nf_conntrack_put(nfct); + return NF_ACCEPT; + } + + nf_bridge_pull_encap_header(skb); + ret = ct_hook->confirm(skb); + switch (ret & NF_VERDICT_MASK) { + case NF_STOLEN: + return NF_STOLEN; + default: + nf_bridge_push_encap_header(skb); + break; + } + + ct = container_of(nfct, struct nf_conn, ct_general); + WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); + + return ret; +} +#endif /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -962,6 +1050,14 @@ static const struct nf_hook_ops br_nf_ops[] = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + { + .hook = br_nf_local_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_LAST, + }, +#endif { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 06e5f6faa431..51d010f64e06 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -77,7 +77,7 @@ struct bridge_mcast_own_query { /* other querier */ struct bridge_mcast_other_query { struct timer_list timer; - unsigned long delay_time; + struct timer_list delay_timer; }; /* selected querier */ @@ -1083,7 +1083,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, own_querier_enabled = false; } - return time_is_before_jiffies(querier->delay_time) && + return !timer_pending(&querier->delay_timer) && (own_querier_enabled || timer_pending(&querier->timer)); } diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4b3982c368b3..b61ef2dff7a4 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -593,21 +593,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + struct net_device *dev, + unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { - struct switchdev_obj_port_mdb *mdb; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = id, + .orig_dev = orig_dev, + }, + }; + struct switchdev_obj_port_mdb *pmdb; - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) + br_switchdev_mdb_populate(&mdb, mp); + + if (action == SWITCHDEV_PORT_OBJ_ADD && + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { + /* This event is already in the deferred queue of + * events, so this replay must be elided, lest the + * driver receives duplicate events for it. This can + * only happen when replaying additions, since + * modifications are always immediately visible in + * br->mdb_list, whereas actual event delivery may be + * delayed. + */ + return 0; + } + + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); + if (!pmdb) return -ENOMEM; - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); - + list_add_tail(&pmdb->obj.list, mdb_list); return 0; } @@ -675,51 +694,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. - */ - rcu_read_lock(); + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + /* br_switchdev_mdb_queue_one() will take care to not queue a + * replay of an event that is already pending in the switchdev + * deferred queue. In order to safely determine that, there + * must be no new deferred MDB notifications enqueued for the + * duration of the MDB scan. Therefore, grab the write-side + * lock to avoid racing with any concurrent IGMP/MLD snooping. + */ + spin_lock_bh(&br->multicast_lock); + + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; + spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, @@ -780,6 +798,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); + + /* Make sure that the device leaving this bridge has seen all + * relevant events before it is disassociated. In the normal + * case, when the device is directly attached to the bridge, + * this is covered by del_nbp(). If the association was indirect + * however, e.g. via a team or bond, and the device is leaving + * that intermediate device, then the bridge port remains in + * place. + */ + switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 06d94b2c6b5d..c7c27ada6704 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, return nf_conntrack_in(skb, &bridge_state); } +static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + if (skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + /* nf_conntrack_confirm() cannot handle concurrent clones, + * this happens for broad/multicast frames with e.g. macvlan on top + * of the bridge device. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + return NF_ACCEPT; + + /* let inet prerouting call conntrack again */ + skb->_nfct = 0; + nf_ct_put(ct); + + return NF_ACCEPT; +} + static void nf_ct_bridge_frag_save(struct sk_buff *skb, struct nf_bridge_frag_data *data) { @@ -415,6 +439,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK, }, + { + .hook = nf_ct_bridge_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, { .hook = nf_ct_bridge_post, .pf = NFPROTO_BRIDGE, diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 16af1a7f80f6..31a93cae5111 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -86,7 +86,7 @@ struct j1939_priv { unsigned int tp_max_packet_size; /* lock for j1939_socks list */ - spinlock_t j1939_socks_lock; + rwlock_t j1939_socks_lock; struct list_head j1939_socks; struct kref rx_kref; @@ -301,6 +301,7 @@ struct j1939_sock { int ifindex; struct j1939_addr addr; + spinlock_t filters_lock; struct j1939_filter *filters; int nfilters; pgn_t pgn_rx_filter; diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index ecff1c947d68..a6fb89fa6278 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) return ERR_PTR(-ENOMEM); j1939_tp_init(priv); - spin_lock_init(&priv->j1939_socks_lock); + rwlock_init(&priv->j1939_socks_lock); INIT_LIST_HEAD(&priv->j1939_socks); mutex_lock(&j1939_netdev_lock); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b0be23559243..58909b36561a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) jsk->state |= J1939_SOCK_BOUND; j1939_priv_get(priv); - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_add_tail(&jsk->list, &priv->j1939_socks); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); } static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) { - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_del_init(&jsk->list); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); j1939_priv_put(priv); jsk->state &= ~J1939_SOCK_BOUND; @@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk, static bool j1939_sk_match_filter(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { - const struct j1939_filter *f = jsk->filters; - int nfilter = jsk->nfilters; + const struct j1939_filter *f; + int nfilter; + + spin_lock_bh(&jsk->filters_lock); + + f = jsk->filters; + nfilter = jsk->nfilters; if (!nfilter) /* receive all when no filters are assigned */ - return true; + goto filter_match_found; for (; nfilter; ++f, --nfilter) { if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) @@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk, continue; if ((skcb->addr.src_name & f->name_mask) != f->name) continue; - return true; + goto filter_match_found; } + + spin_unlock_bh(&jsk->filters_lock); return false; + +filter_match_found: + spin_unlock_bh(&jsk->filters_lock); + return true; } static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, @@ -329,13 +340,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) struct j1939_sock *jsk; bool match = false; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { match = j1939_sk_recv_match_one(jsk, skcb); if (match) break; } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); return match; } @@ -344,11 +355,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sock *jsk; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { j1939_sk_recv_one(jsk, skb); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static void j1939_sk_sock_destruct(struct sock *sk) @@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk) atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); + spin_lock_init(&jsk->filters_lock); /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ sock_set_flag(sk, SOCK_RCU_FREE); @@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, } lock_sock(&jsk->sk); + spin_lock_bh(&jsk->filters_lock); ofilters = jsk->filters; jsk->filters = filters; jsk->nfilters = count; + spin_unlock_bh(&jsk->filters_lock); release_sock(&jsk->sk); kfree(ofilters); return 0; @@ -1080,12 +1094,12 @@ void j1939_sk_errqueue(struct j1939_session *session, } /* spread RX notifications to all sockets subscribed to this session */ - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { if (j1939_sk_recv_match_one(jsk, &session->skcb)) __j1939_sk_errqueue(session, &jsk->sk, type); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); }; void j1939_sk_send_loop_abort(struct sock *sk, int err) @@ -1273,7 +1287,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) struct j1939_sock *jsk; int error_code = ENETDOWN; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { jsk->sk.sk_err = error_code; if (!sock_flag(&jsk->sk, SOCK_DEAD)) @@ -1281,7 +1295,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) j1939_sk_queue_drop_all(priv, jsk, error_code); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, diff --git a/net/core/dev.c b/net/core/dev.c index 0a5566b6f8a2..65284eeec7de 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2271,7 +2271,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); again: list_for_each_entry_rcu(ptype, ptype_list, list) { - if (ptype->ignore_outgoing) + if (READ_ONCE(ptype->ignore_outgoing)) continue; /* Never send packets back to the socket @@ -6645,6 +6645,8 @@ static int napi_threaded_poll(void *data) void *have; while (!napi_thread_wait(napi)) { + unsigned long last_qs = jiffies; + for (;;) { bool repoll = false; @@ -6659,6 +6661,7 @@ static int napi_threaded_poll(void *data) if (!repoll) break; + rcu_softirq_qs_periodic(last_qs); cond_resched(); } } @@ -8861,7 +8864,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data); + size_t size = sizeof(sa->sa_data_min); struct net_device *dev; int ret = 0; @@ -11323,6 +11326,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = { static void __net_exit default_device_exit_net(struct net *net) { + struct netdev_name_node *name_node, *tmp; struct net_device *dev, *aux; /* * Push all migratable network devices back to the @@ -11345,6 +11349,14 @@ static void __net_exit default_device_exit_net(struct net *net) snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); if (netdev_name_in_use(&init_net, fb_name)) snprintf(fb_name, IFNAMSIZ, "dev%%d"); + + netdev_for_each_altname_safe(dev, name_node, tmp) + if (netdev_name_in_use(&init_net, name_node->name)) { + netdev_name_node_del(name_node); + synchronize_rcu(); + __netdev_name_node_alt_destroy(name_node); + } + err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { pr_emerg("%s: failed to move %s to init_net: %d\n", diff --git a/net/core/dev.h b/net/core/dev.h index 9ca91457c197..db9ff8cd8d46 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -63,6 +63,9 @@ int dev_change_name(struct net_device *dev, const char *newname); #define netdev_for_each_altname(dev, namenode) \ list_for_each_entry((namenode), &(dev)->name_node->list, list) +#define netdev_for_each_altname_safe(dev, namenode, next) \ + list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \ + list) int netdev_name_node_alt_create(struct net_device *dev, const char *name); int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7674bb9f3076..5cdbfbf9a7dc 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -342,7 +342,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data), + min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/core/filter.c b/net/core/filter.c index 3a6110ea4009..cb7c4651eaec 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = fib_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5803,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5885,6 +5894,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib6_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = ipv6_stub->fib6_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5939,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -5957,7 +5984,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/net/core/request_sock.c b/net/core/request_sock.c index f35c2e998406..63de5c635842 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -33,9 +33,6 @@ void reqsk_queue_alloc(struct request_sock_queue *queue) { - spin_lock_init(&queue->rskq_lock); - - spin_lock_init(&queue->fastopenq.lock); queue->fastopenq.rskq_rst_head = NULL; queue->fastopenq.rskq_rst_tail = NULL; queue->fastopenq.qlen = 0; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7cf1e42d7f93..ac379e4590f8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5026,10 +5026,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - struct nlattr *br_spec, *attr = NULL; + struct nlattr *br_spec, *attr, *br_flags_attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; - bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -5047,11 +5046,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; - have_flags = true; + br_flags_attr = attr; flags = nla_get_u16(attr); } @@ -5095,8 +5094,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - if (have_flags) - memcpy(nla_data(attr), &flags, sizeof(flags)); + if (br_flags_attr) + memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); out: return err; } diff --git a/net/core/scm.c b/net/core/scm.c index e762a4b8a1d2..a877c4ef4c25 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; /* don't allow io_uring files */ - if (io_uring_get_socket(file)) { + if (io_is_uring_fops(file)) { fput(file); return -EINVAL; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8a819d0a7bfb..e38a4c7449f6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4213,8 +4213,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. + * Cap len to not accidentally hit GSO_BY_FRAGS. */ - partial_segs = len / mss; + partial_segs = min(len, GSO_BY_FRAGS - 1U) / mss; if (partial_segs > 1) mss *= partial_segs; else @@ -6499,6 +6500,14 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, for (i = 0; i < sp->len; i++) xfrm_state_hold(sp->xvec[i]); } +#endif +#ifdef CONFIG_MCTP_FLOWS + if (old_active & (1 << SKB_EXT_MCTP)) { + struct mctp_flow *flow = skb_ext_get_ptr(old, SKB_EXT_MCTP); + + if (flow->key) + refcount_inc(&flow->key->refs); + } #endif __skb_ext_put(old); return new; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 3818035ea002..39643f78cf78 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1217,8 +1217,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } diff --git a/net/core/sock.c b/net/core/sock.c index c50a14a02edd..c8803b95ea0d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -4109,8 +4110,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty_lockless(&sk->sk_receive_queue) || - sk_busy_loop_timeout(sk, start_time); + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + return true; + + if (sk_is_udp(sk) && + !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) + return true; + + return sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); #endif /* CONFIG_NET_RX_BUSY_POLL */ diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index f7cf74cdd3db..e6ea6764d10a 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -190,7 +190,7 @@ int sock_diag_register(const struct sock_diag_handler *hndl) if (sock_diag_handlers[hndl->family]) err = -EBUSY; else - sock_diag_handlers[hndl->family] = hndl; + WRITE_ONCE(sock_diag_handlers[hndl->family], hndl); mutex_unlock(&sock_diag_table_mutex); return err; @@ -206,7 +206,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) mutex_lock(&sock_diag_table_mutex); BUG_ON(sock_diag_handlers[family] != hnld); - sock_diag_handlers[family] = NULL; + WRITE_ONCE(sock_diag_handlers[family], NULL); mutex_unlock(&sock_diag_table_mutex); } EXPORT_SYMBOL_GPL(sock_diag_unregister); @@ -224,7 +224,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX); - if (sock_diag_handlers[req->sdiag_family] == NULL) + if (READ_ONCE(sock_diag_handlers[req->sdiag_family]) == NULL) sock_load_diag_module(req->sdiag_family, 0); mutex_lock(&sock_diag_table_mutex); @@ -283,12 +283,12 @@ static int sock_diag_bind(struct net *net, int group) switch (group) { case SKNLGRP_INET_TCP_DESTROY: case SKNLGRP_INET_UDP_DESTROY: - if (!sock_diag_handlers[AF_INET]) + if (!READ_ONCE(sock_diag_handlers[AF_INET])) sock_load_diag_module(AF_INET, 0); break; case SKNLGRP_INET6_TCP_DESTROY: case SKNLGRP_INET6_UDP_DESTROY: - if (!sock_diag_handlers[AF_INET6]) + if (!READ_ONCE(sock_diag_handlers[AF_INET6])) sock_load_diag_module(AF_INET6, 0); break; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b1e86a7265b3..83906d093f0a 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -291,7 +291,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -338,7 +338,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 80cdc6f6b34c..0323ab5023c6 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) return false; /* Get next tlv */ - total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length; + total_length += hsr_sup_tag->tlv.HSR_TLV_length; if (!pskb_may_pull(skb, total_length)) return false; skb_pull(skb, total_length); diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 0b0199878095..e44a039e36af 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -235,6 +235,10 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, */ if (ethhdr->h_proto == htons(ETH_P_PRP) || ethhdr->h_proto == htons(ETH_P_HSR)) { + /* Check if skb contains hsr_ethhdr */ + if (skb->mac_len < sizeof(struct hsr_ethhdr)) + return NULL; + /* Use the existing sequence_nr from the tag as starting point * for filtering duplicate frames. */ diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index b099c3150150..257b50124cee 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -148,14 +148,21 @@ static struct notifier_block hsr_nb = { static int __init hsr_init(void) { - int res; + int err; BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_HLEN); - register_netdevice_notifier(&hsr_nb); - res = hsr_netlink_init(); + err = register_netdevice_notifier(&hsr_nb); + if (err) + return err; - return res; + err = hsr_netlink_init(); + if (err) { + unregister_netdevice_notifier(&hsr_nb); + return err; + } + + return 0; } static void __exit hsr_exit(void) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c13b8ed63f87..9408dc3bb42d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -324,6 +324,9 @@ lookup_protocol: if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; + if (INET_PROTOSW_ICSK & answer_flags) + inet_init_csk_locks(sk); + inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; @@ -1634,10 +1637,12 @@ EXPORT_SYMBOL(inet_current_timestamp); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { - if (sk->sk_family == AF_INET) + unsigned int family = READ_ONCE(sk->sk_family); + + if (family == AF_INET) return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) + if (family == AF_INET6) return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); #endif return -EINVAL; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb35e5..ccff96820a70 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, (unsigned char)sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 35d6e74be840..bb0d1252cad8 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1804,6 +1804,21 @@ done: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; @@ -1855,8 +1870,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &tgt_net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ - tgt_net->dev_base_seq; + cb->seq = inet_base_seq(tgt_net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -2257,8 +2271,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 80ce0112e24b..79fa19a36bbd 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -727,6 +727,10 @@ out: } if (req) reqsk_put(req); + + if (newsk) + inet_init_csk_locks(newsk); + return newsk; out_err: newsk = NULL; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f7426926a104..8f690a6e61ba 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -57,7 +57,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto) return ERR_PTR(-ENOENT); } - if (!inet_diag_table[proto]) + if (!READ_ONCE(inet_diag_table[proto])) sock_load_diag_module(AF_INET, proto); mutex_lock(&inet_diag_table_mutex); @@ -1419,7 +1419,7 @@ int inet_diag_register(const struct inet_diag_handler *h) mutex_lock(&inet_diag_table_mutex); err = -EEXIST; if (!inet_diag_table[type]) { - inet_diag_table[type] = h; + WRITE_ONCE(inet_diag_table[type], h); err = 0; } mutex_unlock(&inet_diag_table_mutex); @@ -1436,7 +1436,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) return; mutex_lock(&inet_diag_table_mutex); - inet_diag_table[type] = NULL; + WRITE_ONCE(inet_diag_table[type], NULL); mutex_unlock(&inet_diag_table_mutex); } EXPORT_SYMBOL_GPL(inet_diag_unregister); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index f2ed2aed08ab..0ad25e6783ac 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1111,10 +1111,33 @@ ok: return 0; error: + if (sk_hashed(sk)) { + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); + + sock_prot_inuse_add(net, sk->sk_prot, -1); + + spin_lock(lock); + __sk_nulls_del_node_init_rcu(sk); + spin_unlock(lock); + + sk->sk_hash = 0; + inet_sk(sk)->inet_sport = 0; + inet_sk(sk)->inet_num = 0; + + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + } + spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - spin_unlock_bh(&head->lock); + spin_unlock(&head->lock); + + if (tw) + inet_twsk_deschedule_put(tw); + + local_bh_enable(); + return -ENOMEM; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1d77d992e6e7..340a8f0c2980 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -281,12 +281,12 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); +/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) { - struct inet_timewait_sock *tw; - struct sock *sk; struct hlist_nulls_node *node; unsigned int slot; + struct sock *sk; for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; @@ -295,38 +295,35 @@ restart_rcu: rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { - if (sk->sk_state != TCP_TIME_WAIT) { - /* A kernel listener socket might not hold refcnt for net, - * so reqsk_timer_handler() could be fired after net is - * freed. Userspace listener and reqsk never exist here. - */ - if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV && - hashinfo->pernet)) { - struct request_sock *req = inet_reqsk(sk); + int state = inet_sk_state_load(sk); - inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); - } - - continue; - } - - tw = inet_twsk(sk); - if ((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count)) + if ((1 << state) & ~(TCPF_TIME_WAIT | + TCPF_NEW_SYN_RECV)) continue; - if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + if (sk->sk_family != family || + refcount_read(&sock_net(sk)->ns.count)) continue; - if (unlikely((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count))) { - inet_twsk_put(tw); + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) + continue; + + if (unlikely(sk->sk_family != family || + refcount_read(&sock_net(sk)->ns.count))) { + sock_gen_put(sk); goto restart; } rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule_put(tw); + if (state == TCP_TIME_WAIT) { + inet_twsk_deschedule_put(inet_twsk(sk)); + } else { + struct request_sock *req = inet_reqsk(sk); + + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + } local_bh_enable(); goto restart_rcu; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e19ef88ae181..a6d460aaee79 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1268,6 +1268,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, if (unlikely(!rt)) return -EFAULT; + cork->fragsize = ip_sk_use_pmtu(sk) ? + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + /* * setup for corking. */ @@ -1284,12 +1290,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->addr = ipc->addr; } - cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); - - if (!inetdev_valid_mtu(cork->fragsize)) - return -ENETUNREACH; - cork->gso_size = ipc->gso_size; cork->dst = &rt->dst; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c1fb7580ea58..a00ba2d51e1b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1406,12 +1406,13 @@ e_inval: * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer + * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || @@ -1440,7 +1441,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + if (drop_dst) + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 24961b304dad..3445e576b05b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -364,7 +364,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, bool log_ecn_error) { const struct iphdr *iph = ip_hdr(skb); - int err; + int nh, err; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -390,8 +390,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + iph = (struct iphdr *)(skb->head + nh); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -540,6 +553,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + static const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto, int tunnel_hlen) { @@ -614,13 +641,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, headroom); - - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, headroom)) { ip_rt_put(rt); goto tx_dropped; } + + ip_tunnel_adj_headroom(dev, headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; @@ -800,16 +827,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, max_headroom)) { ip_rt_put(rt); dev->stats.tx_dropped++; kfree_skb(skb); return; } + ip_tunnel_adj_headroom(dev, max_headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 586b1b3e35b8..80ccd6661aa3 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) }; skb_reset_network_header(skb); - csum = csum_partial(icmp6h, len, 0); + csum = skb_checksum(skb, skb_transport_offset(skb), len, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, IPPROTO_ICMPV6, csum); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b807197475a5..3ed9ed2bffd2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - ipv4_pktinfo_prepare(mroute_sk, pkt); + ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); @@ -1581,9 +1581,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, if (copy_from_sockptr(&olr, optlen, sizeof(int))) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; + + olr = min_t(unsigned int, olr, sizeof(int)); + if (copy_to_sockptr(optlen, &olr, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, olr)) diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 4073762996e2..fc761915c5f6 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -279,6 +279,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, goto free_nskb; nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* If we use ip_local_out for bridged traffic, the MAC source on diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 639aa5abda9d..ee0efd0efec4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -286,11 +286,13 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { + enum skb_drop_reason reason; + /* Charge it to the socket. */ - ipv4_pktinfo_prepare(sk, skb); - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); + ipv4_pktinfo_prepare(sk, skb, true); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + kfree_skb_reason(skb, reason); return NET_RX_DROP; } @@ -301,7 +303,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) { if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { atomic_inc(&sk->sk_drops); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } nf_reset_ct(skb); @@ -346,6 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, goto error; skb_reserve(skb, hlen); + skb->protocol = htons(ETH_P_IP); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0b7844a8d571..5a165e29f7be 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -718,6 +718,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now, if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); + smp_mb__after_atomic(); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. @@ -1933,7 +1934,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, @@ -4091,11 +4102,11 @@ int do_tcp_getsockopt(struct sock *sk, int level, if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 42844d20da02..b3bfa1a09df6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -357,10 +357,6 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family) /* Even if tw_refcount == 1, we must clean up kernel reqsk */ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); } else if (!purged_once) { - /* The last refcount is decremented in tcp_sk_exit_batch() */ - if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1) - continue; - inet_twsk_purge(&tcp_hashinfo, family); purged_once = true; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 11c0e1c66642..7856b7a3e0ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2196,7 +2196,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); return __udp_queue_rcv_skb(sk, skb); csum_error: @@ -2790,11 +2790,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case UDP_CORK: val = udp_test_bit(CORK, sk); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b8dc20fe7a4e..1648373692a9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -706,6 +706,22 @@ errout: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -739,8 +755,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -5326,7 +5341,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; @@ -5458,9 +5473,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, } addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (!addr) - return -EINVAL; - + if (!addr) { + err = -EINVAL; + goto errout; + } ifm = nlmsg_data(nlh); if (ifm->ifa_index) dev = dev_get_by_index(tgt_net, ifm->ifa_index); diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 507a8353a6bd..c008d21925d7 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) + = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) + = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a2f29ca51600..62247621cea5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -199,6 +199,9 @@ lookup_protocol: if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; + if (INET_PROTOSW_ICSK & answer_flags) + inet_init_csk_locks(sk); + inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; @@ -1074,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5fa0e37305d9..1cfdd9d95012 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -180,6 +180,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -974,6 +976,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 7c2003833010..be52b18e08a6 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -449,6 +449,11 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } +static void fib6_rule_flush_cache(struct fib_rules_ops *ops) +{ + rt_genid_bump_ipv6(ops->fro_net); +} + static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), @@ -461,6 +466,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, + .flush_cache = fib6_rule_flush_cache, .nlgroup = RTNLGRP_IPV6_RULE, .owner = THIS_MODULE, .fro_net = &init_net, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9125e92d9917..2699915bb85b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { @@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_reset_mac_header(skb); } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 566f3b7b957e..a77769538940 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2722,7 +2722,6 @@ void ipv6_mc_down(struct inet6_dev *idev) /* Should stop work after group drop. or we will * start work again in mld_ifc_event() */ - synchronize_net(); mld_query_stop_work(idev); mld_report_stop_work(idev); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 433d98bbe33f..71d692728230 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -344,6 +344,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* If we use ip6_local_out for bridged traffic, the MAC source on diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7f65dc750feb..887599d351b8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5335,19 +5335,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); - fib6_info_release(nh->fib6_info); - if (!err) { - /* save reference to last route successfully inserted */ - rt_last = nh->fib6_info; - - /* save reference to first route for notification */ - if (!rt_notif) - rt_notif = nh->fib6_info; - } - - /* nh->fib6_info is used or freed at this point, reset to NULL*/ - nh->fib6_info = NULL; if (err) { if (replace && nhn) NL_SET_ERR_MSG_MOD(extack, @@ -5355,6 +5343,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = nh; goto add_errout; } + /* save reference to last route successfully inserted */ + rt_last = nh->fib6_info; + + /* save reference to first route for notification */ + if (!rt_notif) + rt_notif = nh->fib6_info; /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, @@ -5415,8 +5409,7 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->fib6_info) - fib6_info_release(nh->fib6_info); + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec9f..35508abd76f4 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fc3fddeb6f36..f66b5f74cd83 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID"; static LIST_HEAD(iucv_handler_list); /* - * iucv_path_table: an array of iucv_path structures. + * iucv_path_table: array of pointers to iucv_path structures. */ static struct iucv_path **iucv_path_table; static unsigned long iucv_max_pathid; @@ -544,7 +544,7 @@ static int iucv_enable(void) cpus_read_lock(); rc = -ENOMEM; - alloc_size = iucv_max_pathid * sizeof(struct iucv_path); + alloc_size = iucv_max_pathid * sizeof(*iucv_path_table); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); if (!iucv_path_table) goto out; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 65845c59c065..7d37bf4334d2 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1274,10 +1274,11 @@ static int kcm_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case KCM_RECV_DISABLE: val = kcm->rx_disabled; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 314ec3a51e8d..bb92dc8b82f3 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -630,7 +630,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f011af6601c9..6146e4e67bbb 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1356,11 +1356,11 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + err = -ENOTCONN; if (!sk->sk_user_data) goto end; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 9ffbc667be6c..8e3be0009f60 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock) } netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); + sock_orphan(sk); + sock->sk = NULL; llc_sk_free(sk); out: return 0; @@ -928,14 +930,15 @@ copy_uaddr: */ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { + DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; + int rc = -EINVAL, copied = 0, hdrlen, hh_len; struct sk_buff *skb = NULL; + struct net_device *dev; size_t size = 0; - int rc = -EINVAL, copied = 0, hdrlen; dprintk("%s: sending from %02X to %02X\n", __func__, llc->laddr.lsap, llc->daddr.lsap); @@ -955,22 +958,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (rc) goto out; } - hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); + dev = llc->dev; + hh_len = LL_RESERVED_SPACE(dev); + hdrlen = llc_ui_header_len(sk, addr); size = hdrlen + len; - if (size > llc->dev->mtu) - size = llc->dev->mtu; + size = min_t(size_t, size, READ_ONCE(dev->mtu)); copied = size - hdrlen; rc = -EINVAL; if (copied < 0) goto out; release_sock(sk); - skb = sock_alloc_send_skb(sk, size, noblock, &rc); + skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc); lock_sock(sk); if (!skb) goto out; - skb->dev = llc->dev; + if (sock_flag(sk, SOCK_ZAPPED) || + llc->dev != dev || + hdrlen != llc_ui_header_len(sk, addr) || + hh_len != LL_RESERVED_SPACE(dev) || + size > READ_ONCE(dev->mtu)) + goto out; + skb->dev = dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); - skb_reserve(skb, hdrlen); + skb_reserve(skb, hh_len + hdrlen); rc = memcpy_from_msg(skb_put(skb, copied), msg, copied); if (rc) goto out; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 6e387aadffce..4f16d9c88350 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = { .func = llc_rcv, }; -static struct packet_type llc_tr_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TR_802_2), - .func = llc_rcv, -}; - static int __init llc_init(void) { dev_add_pack(&llc_packet_type); - dev_add_pack(&llc_tr_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); - dev_remove_pack(&llc_tr_packet_type); } module_init(llc_init); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a2c4866080bd..1e57027da291 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1775,6 +1775,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sband->band); } + ieee80211_sta_set_rx_nss(link_sta); + return ret; } @@ -2073,15 +2075,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) { - ieee80211_clear_fast_rx(sta); + sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); - } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; + ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c07645c999f9..f25dc6931a5b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7221,8 +7221,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); beacon_ies = rcu_dereference(req->bss->beacon_ies); - - if (beacon_ies) { + if (!beacon_ies) { /* * Wait up to one beacon interval ... * should this be more if we miss one? @@ -7295,6 +7294,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d5ea5f5bcf3a..9d33fd2377c8 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local, rcu_read_unlock(); } - drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); + if (sta->uploaded) + drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 49b71453dec3..bd56015b2925 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -396,7 +396,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) int i; for (i = 0; i < ARRAY_SIZE(sta->link); i++) { - if (!(sta->sta.valid_links & BIT(i))) + struct link_sta_info *link_sta; + + link_sta = rcu_access_pointer(sta->link[i]); + if (!link_sta) continue; sta_remove_link(sta, i, false); @@ -888,6 +891,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: if (sta->sta.valid_links) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2db103a56a28..3d62e8b71874 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Transmit and frame generation functions. */ @@ -3044,7 +3044,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || @@ -3838,6 +3838,7 @@ begin: goto begin; skb = __skb_dequeue(&tx.skbs); + info = IEEE80211_SKB_CB(skb); if (!skb_queue_empty(&tx.skbs)) { spin_lock_bh(&fq->lock); @@ -3882,7 +3883,7 @@ begin: } encap_out: - IEEE80211_SKB_CB(skb)->control.vif = vif; + info->control.vif = vif; if (tx.sta && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 55550ead2ced..a4cc9d077c59 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -265,19 +265,27 @@ fail: return -ENOMEM; } +static void mac802154_llsec_key_del_rcu(struct rcu_head *rcu) +{ + struct ieee802154_llsec_key_entry *pos; + struct mac802154_llsec_key *mkey; + + pos = container_of(rcu, struct ieee802154_llsec_key_entry, rcu); + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + llsec_key_put(mkey); + kfree_sensitive(pos); +} + int mac802154_llsec_key_del(struct mac802154_llsec *sec, const struct ieee802154_llsec_key_id *key) { struct ieee802154_llsec_key_entry *pos; list_for_each_entry(pos, &sec->table.keys, list) { - struct mac802154_llsec_key *mkey; - - mkey = container_of(pos->key, struct mac802154_llsec_key, key); - if (llsec_key_id_equal(&pos->id, key)) { list_del_rcu(&pos->list); - llsec_key_put(mkey); + call_rcu(&pos->rcu, mac802154_llsec_key_del_rcu); return 0; } } diff --git a/net/mctp/route.c b/net/mctp/route.c index 68be8f2b622d..05ab4fddc82e 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } @@ -843,6 +843,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, /* copy message payload */ skb_copy_bits(skb, pos, skb_transport_header(skb2), size); + /* we need to copy the extensions, for MCTP flow data */ + skb_ext_copy(skb2, skb); + /* do route */ rc = rt->output(rt, skb2); if (rc) @@ -888,7 +891,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); if (!dev) { rcu_read_unlock(); - return rc; + goto out_free; } rt->dev = __mctp_dev_get(dev); rcu_read_unlock(); @@ -903,7 +906,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rt->mtu = 0; } else { - return -EINVAL; + rc = -EINVAL; + goto out_free; } spin_lock_irqsave(&rt->dev->addrs_lock, flags); @@ -966,12 +970,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rc = mctp_do_fragment_route(rt, skb, mtu, tag); } + /* route output functions consume the skb, even on error */ + skb = NULL; + out_release: if (!ext_rt) mctp_route_release(rt); mctp_dev_put(tmp_rt.dev); +out_free: + kfree_skb(skb); return rc; } diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a536586742f2..7017dd60659d 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,22 @@ #include #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; + if (inet_sk_state_load(sk) == TCP_LISTEN) + return 0; + start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -63,17 +68,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { err = -EMSGSIZE; goto nla_failure; } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 980050f6b456..3328870b0c1f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -407,23 +407,12 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } -static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, - const struct mptcp_addr_info *addr) -{ - int i; - - for (i = 0; i < nr; i++) { - if (addrs[i].id == addr->id) - return true; - } - - return false; -} - /* Fill all the remote addresses into the array addrs[], * and return the array size. */ -static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *local, + bool fullmesh, struct mptcp_addr_info *addrs) { bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); @@ -446,15 +435,28 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm msk->pm.subflows++; addrs[i++] = remote; } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = subflow->remote_id; + addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && - msk->pm.subflows < subflows_max) { + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } @@ -603,7 +605,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); msk->pm.local_addr_used++; - nr = fill_remote_addresses_vec(msk, fullmesh, addrs); + nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); if (nr) __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); spin_unlock_bh(&msk->pm.lock); @@ -798,18 +800,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow->local_id; + u8 id = subflow_get_local_id(subflow); - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, subflow->local_id, subflow->remote_id, - msk->mpc_endpoint_id); + i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -900,7 +902,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -942,7 +945,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) { + if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -953,7 +956,7 @@ find_next: } } - if (!entry->addr.id) + if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1095,7 +1098,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); @@ -1311,6 +1314,18 @@ next: return 0; } +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_addr_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1352,7 +1367,8 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG(info, "too many addresses or duplicate one"); goto out_free; @@ -2014,7 +2030,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) if (WARN_ON_ONCE(!sf)) return -EINVAL; - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 38cbdc66d8bf..414ed70e7ba2 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -25,8 +25,9 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } } -int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) +static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry, + bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; @@ -41,7 +42,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); - if (addr_match && entry->addr.id == 0) + if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match && id_match) { @@ -64,7 +65,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, } *e = *entry; - if (!e->addr.id) + if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); @@ -132,10 +133,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { - struct mptcp_pm_addr_entry new_entry; + struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&e->addr, skc, false)) { + entry = e; + break; + } + } + spin_unlock_bh(&msk->pm.lock); + if (entry) + return entry->addr.id; + memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); new_entry.addr = *skc; new_entry.addr.id = 0; @@ -144,7 +156,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, if (new_entry.addr.port == msk_sport) new_entry.addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) @@ -186,7 +198,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto announce_err; @@ -210,6 +222,40 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) return err; } +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, + struct genl_info *info) +{ + struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool has_id_0 = false; + int err = -EINVAL; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->local_id) == 0) { + has_id_0 = true; + break; + } + } + if (!has_id_0) { + GENL_SET_ERR_MSG(info, "address with id 0 not found"); + goto remove_err; + } + + list.ids[list.nr++] = 0; + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + + err = 0; + +remove_err: + release_sock(sk); + return err; +} + int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -241,6 +287,11 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) goto remove_err; } + if (id_val == 0) { + err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + goto remove_err; + } + lock_sock((struct sock *)msk); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { @@ -324,7 +375,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) } local.addr = addr_l; - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; @@ -438,6 +489,16 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) goto destroy_err; } +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { + ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6); + addr_l.family = AF_INET6; + } + if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) { + ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); + addr_r.family = AF_INET6; + } +#endif if (addr_l.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); err = -EINVAL; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 76539d1004eb..3bc21581486a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -119,7 +119,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) subflow->request_mptcp = 1; /* This is the first subflow, always with id 0 */ - subflow->local_id_valid = 1; + WRITE_ONCE(subflow->local_id, 0); mptcp_sock_graft(msk->first, sk->sk_socket); return 0; @@ -1319,6 +1319,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext = skb_ext_find(skb, SKB_EXT_MPTCP); if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; + tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } @@ -1582,8 +1583,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, void mptcp_check_and_set_pending(struct sock *sk) { - if (mptcp_send_head(sk)) - mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); + if (mptcp_send_head(sk)) { + mptcp_data_lock(sk); + mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING); + mptcp_data_unlock(sk); + } } void __mptcp_push_pending(struct sock *sk, unsigned int flags) @@ -2336,9 +2340,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(mptcp_sk(sk))) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue @@ -2422,7 +2423,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out_release; } - dispose_it = !msk->subflow || ssk != msk->subflow->sk; + dispose_it = msk->free_first || ssk != msk->first; if (dispose_it) list_del(&subflow->node); @@ -2440,7 +2441,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { __mptcp_subflow_disconnect(ssk, subflow, flags); - msk->subflow->state = SS_UNCONNECTED; + if (msk->subflow && ssk == msk->subflow->sk) + msk->subflow->state = SS_UNCONNECTED; release_sock(ssk); goto out; @@ -3144,7 +3146,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->last_snd = NULL; WRITE_ONCE(msk->flags, 0); msk->cb_flags = 0; - msk->push_pending = 0; msk->recovery = false; msk->can_ack = false; msk->fully_established = false; @@ -3168,8 +3169,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } + +static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) +{ + const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; + struct ipv6_pinfo *newnp; + + newnp = inet6_sk(newsk); + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + if (!opt) + net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__); + } + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); +} #endif +static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) +{ + struct ip_options_rcu *inet_opt, *newopt = NULL; + const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; + + newinet = inet_sk(newsk); + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + inet_opt->opt.optlen, GFP_ATOMIC); + if (newopt) + memcpy(newopt, inet_opt, sizeof(*inet_opt) + + inet_opt->opt.optlen); + else + net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); + } + RCU_INIT_POINTER(newinet->inet_opt, newopt); + rcu_read_unlock(); +} + struct sock *mptcp_sk_clone_init(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct sock *ssk, @@ -3190,6 +3233,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_init_sock(nsk); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (nsk->sk_family == AF_INET6) + mptcp_copy_ip6_options(nsk, sk); + else +#endif + mptcp_copy_ip_options(nsk, sk); + msk = mptcp_sk(nsk); msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; @@ -3202,7 +3252,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; msk->snd_una = msk->write_seq; - msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; + msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) { @@ -3341,10 +3391,10 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - /* clears msk->subflow, allowing the following to close - * even the initial subflow - */ mptcp_dispose_initial_subflow(msk); + + /* allow the following to close even the initial subflow */ + msk->free_first = 1; mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } @@ -3388,8 +3438,7 @@ static void mptcp_release_cb(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); for (;;) { - unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | - msk->push_pending; + unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED); struct list_head join_list; if (!flags) @@ -3405,7 +3454,6 @@ static void mptcp_release_cb(struct sock *sk) * datapath acquires the msk socket spinlock while helding * the subflow socket lock */ - msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 4ec8e0a81b5a..2bc37773e780 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -272,7 +272,6 @@ struct mptcp_sock { int rmem_released; unsigned long flags; unsigned long cb_flags; - unsigned long push_pending; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; @@ -287,7 +286,8 @@ struct mptcp_sock { cork:1, nodelay:1, fastopening:1, - in_accept_queue:1; + in_accept_queue:1, + free_first:1; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -475,7 +475,6 @@ struct mptcp_subflow_context { can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; u32 remote_nonce; @@ -483,7 +482,7 @@ struct mptcp_subflow_context { u32 local_nonce; u32 remote_token; u8 hmac[MPTCPOPT_HMAC_LEN]; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -529,6 +528,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -834,8 +834,6 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); -int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry); void mptcp_free_local_addr_list(struct mptcp_sock *msk); int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); @@ -911,6 +909,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} + void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 45d20e20cfc0..891c2f4fed08 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -446,7 +446,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -489,8 +489,8 @@ do_reset: static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -499,7 +499,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -1477,7 +1477,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr, ssk->sk_family); @@ -1630,6 +1630,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1867,13 +1868,13 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; new_ctx->fully_established = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 55a7f72d547c..edf92074221e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -707,6 +707,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) } EXPORT_SYMBOL(nf_conntrack_destroy); +void nf_ct_set_closing(struct nf_conntrack *nfct) +{ + const struct nf_ct_hook *ct_hook; + + if (!nfct) + return; + + rcu_read_lock(); + ct_hook = rcu_dereference(nf_ct_hook); + if (ct_hook) + ct_hook->set_closing(nfct); + + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nf_ct_set_closing); + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 26ab0e9612d8..9523104a90da 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -28,6 +28,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d47dfdcb899b..f645da82d826 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1156,6 +1156,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -1184,6 +1185,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1195,8 +1204,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1208,8 +1215,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1223,6 +1232,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1230,6 +1241,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1240,10 +1254,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1396,9 +1416,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2364,6 +2381,7 @@ ip_set_net_exit(struct net *net) set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; + set->variant->cancel_gc(set); ip_set_destroy_set(set); } } @@ -2411,8 +2429,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7499192af586..ef04e556aadb 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -210,6 +210,7 @@ htable_size(u8 hbits) #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -254,6 +255,7 @@ htable_size(u8 hbits) #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -417,7 +419,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference(hbucket(t, i)); + n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -437,10 +439,7 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + mtype_ahash_destroy(set, (__force struct htable *)h->table, true); list_for_each_safe(l, lt, &h->ad) { list_del(l); kfree(l); @@ -586,6 +585,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1416,6 +1424,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 5a67f7966574..6bc7019982b0 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 796026296609..024f93fc8c0b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2772,11 +2772,24 @@ err_cachep: return ret; } +static void nf_conntrack_set_closing(struct nf_conntrack *nfct) +{ + struct nf_conn *ct = nf_ct_to_nf_conn(nfct); + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + nf_conntrack_tcp_set_closing(ct); + break; + } +} + static const struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, .destroy = nf_ct_destroy, .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, + .set_closing = nf_conntrack_set_closing, + .confirm = __nf_conntrack_confirm, }; void nf_conntrack_init_end(void) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index e697a824b001..540d97715bd2 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get fields bitmap */ if (nf_h323_error_boundary(bs, 0, f->sz)) return H323_ERROR_BOUND; + if (f->sz > 32) + return H323_ERROR_RANGE; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, bmp2_len = get_bits(bs, 7) + 1; if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; + if (bmp2_len > 32) + return H323_ERROR_RANGE; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c94a9971d790..7ffd698497f2 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -299,7 +299,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3ac1af6f59fc..9480e638e5d1 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - u32 end, u32 win) + u32 end, u32 win, + enum ip_conntrack_dir dir) { /* SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. @@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, * Both sides must send the Window Scale option * to enable window scaling in either direction. */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + if (dir == IP_CT_DIR_REPLY && + !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { sender->td_scale = 0; receiver->td_scale = 0; @@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, if (tcph->syn) { tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (!tcph->ack) /* Simultaneous open */ return NFCT_TCP_ACCEPT; @@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, */ tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (dir == IP_CT_DIR_REPLY && !tcph->ack) return NFCT_TCP_ACCEPT; @@ -911,6 +913,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct) return false; } +void nf_conntrack_tcp_set_closing(struct nf_conn *ct) +{ + enum tcp_conntrack old_state; + const unsigned int *timeouts; + u32 timeout; + + if (!nf_ct_is_confirmed(ct)) + return; + + spin_lock_bh(&ct->lock); + old_state = ct->proto.tcp.state; + ct->proto.tcp.state = TCP_CONNTRACK_CLOSE; + + if (old_state == TCP_CONNTRACK_CLOSE || + test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { + spin_unlock_bh(&ct->lock); + return; + } + + timeouts = nf_ct_timeout_lookup(ct); + if (!timeouts) { + const struct nf_tcp_net *tn; + + tn = nf_tcp_pernet(nf_ct_net(ct)); + timeouts = tn->timeouts; + } + + timeout = timeouts[TCP_CONNTRACK_CLOSE]; + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); + + spin_unlock_bh(&ct->lock); + + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); +} + static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) { state->td_end = 0; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index c1d99cb370b4..99195cf6b265 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) return 0; } +static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, + enum flow_offload_tuple_dir dir) +{ + struct dst_entry *dst = route->tuple[dir].dst; + + route->tuple[dir].dst = NULL; + + return dst; +} + static int flow_offload_fill_route(struct flow_offload *flow, - const struct nf_flow_route *route, + struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; - struct dst_entry *dst = route->tuple[dir].dst; + struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { @@ -122,12 +132,10 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: - if (!dst_hold_safe(route->tuple[dir].dst)) - return -1; - flow_tuple->dst_cache = dst; flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); break; @@ -148,27 +156,12 @@ static void nft_flow_dst_release(struct flow_offload *flow, dst_release(flow->tuplehash[dir].tuple.dst_cache); } -int flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route) +void flow_offload_route_init(struct flow_offload *flow, + struct nf_flow_route *route) { - int err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); - if (err < 0) - return err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); - if (err < 0) - goto err_route_reply; - + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); flow->type = NF_FLOW_OFFLOAD_ROUTE; - - return 0; - -err_route_reply: - nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); - - return err; } EXPORT_SYMBOL_GPL(flow_offload_route_init); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8a29290149bd..be93a02497d6 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) return; } - BUG_ON(loggers[pf][type] == NULL); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); - module_put(logger->me); + if (!logger) + WARN_ON_ONCE(1); + else + module_put(logger->me); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_logger_put); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 86f430e01929..2a5d9075a081 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -24,6 +24,7 @@ #include #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +#define NFT_SET_MAX_ANONLEN 16 unsigned int nf_tables_net_id __read_mostly; @@ -685,15 +686,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } -static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, - struct nft_flowtable *flowtable) +static struct nft_trans * +nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -702,22 +704,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); - return 0; + return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { - int err; + struct nft_trans *trans; - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); - if (err < 0) - return err; + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (IS_ERR(trans)) + return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); - return err; + return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) @@ -1203,7 +1205,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags & ~NFT_TABLE_F_MASK) return -EOPNOTSUPP; - if (flags == ctx->table->flags) + if (flags == (ctx->table->flags & NFT_TABLE_F_MASK)) return 0; if ((nft_table_has_owner(ctx->table) && @@ -1244,6 +1246,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } @@ -2056,7 +2059,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, struct nft_hook *hook; int err; - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) { err = -ENOMEM; goto err_hook_alloc; @@ -2457,19 +2460,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); - err = nf_tables_register_hook(net, table, chain); - if (err < 0) - goto err_destroy_chain; - if (!nft_use_inc(&table->use)) { err = -EMFILE; - goto err_use; + goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err_unregister_hook; + goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2477,17 +2476,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); - if (err < 0) { - nft_trans_destroy(trans); - goto err_unregister_hook; - } + if (err < 0) + goto err_chain_add; + + /* This must be LAST to ensure no packets are walking over this chain. */ + err = nf_tables_register_hook(net, table, chain); + if (err < 0) + goto err_register_hook; return 0; -err_unregister_hook: +err_register_hook: + nft_chain_del(chain); +err_chain_add: + nft_trans_destroy(trans); +err_trans: nft_use_dec_restore(&table->use); -err_use: - nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -4127,6 +4131,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; + if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN) + return -EINVAL; + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (inuse == NULL) return -ENOMEM; @@ -4704,6 +4711,12 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; + if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) == + (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; @@ -4745,6 +4758,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4753,6 +4769,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } @@ -5118,6 +5138,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); @@ -7125,11 +7146,15 @@ nla_put_failure: return -1; } -static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; list_for_each_entry(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; + if (objtype == type->type) return type; } @@ -7137,11 +7162,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype) } static const struct nft_object_type * -nft_obj_type_get(struct net *net, u32 objtype) +nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (type != NULL && try_module_get(type->owner)) return type; @@ -7234,7 +7259,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (WARN_ON_ONCE(!type)) return -ENOENT; @@ -7248,7 +7273,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&table->use)) return -EMFILE; - type = nft_obj_type_get(net, objtype); + type = nft_obj_type_get(net, objtype, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err_type; @@ -7929,7 +7954,7 @@ err_unregister_net_hooks: return err; } -static void nft_flowtable_hooks_destroy(struct list_head *hook_list) +static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; @@ -8022,9 +8047,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; - struct nft_hook *hook, *next; struct net *net = info->net; struct nft_table *table; + struct nft_trans *trans; struct nft_ctx ctx; int err; @@ -8104,34 +8129,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb, err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], &flowtable_hook, flowtable, true); if (err < 0) - goto err4; + goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto err_flowtable_trans; + } + + /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); - if (err < 0) { - nft_flowtable_hooks_destroy(&flowtable->hook_list); - goto err4; - } - - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; + goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; -err5: - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nft_unregister_flowtable_hook(net, flowtable, hook); - list_del_rcu(&hook->list); - kfree_rcu(hook, rcu); - } -err4: + +err_flowtable_hooks: + nft_trans_destroy(trans); +err_flowtable_trans: + nft_hooks_destroy(&flowtable->hook_list); +err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); @@ -8884,7 +8909,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; @@ -9841,7 +9866,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 680fe557686e..274b6f7e6bb5 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_base_chain *basechain; struct nftables_pernet *nft_net; - struct nft_table *table; struct nft_chain *chain, *nr; + struct nft_table *table; struct nft_ctx ctx = { .net = dev_net(dev), }; @@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_net = nft_pernet(ctx.net); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { - if (table->family != NFPROTO_NETDEV) + if (table->family != NFPROTO_NETDEV && + table->family != NFPROTO_INET) continue; ctx.family = table->family; @@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this, if (!nft_is_base_chain(chain)) continue; + basechain = nft_base_chain(chain); + if (table->family == NFPROTO_INET && + basechain->ops.hooknum != NF_INET_INGRESS) + continue; + ctx.chain = chain; nft_netdev_event(event, dev, &ctx); } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index c16172427622..e4b8c02c5e6a 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr, static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, }; @@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 l4proto; u32 flags; int err; @@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); - if (flags & ~NFT_RULE_COMPAT_F_MASK) + if (flags & NFT_RULE_COMPAT_F_UNUSED || + flags & ~NFT_RULE_COMPAT_F_MASK) return -EINVAL; if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + if (l4proto > U16_MAX) + return -EINVAL; + + *proto = l4proto; + return 0; } @@ -349,6 +356,22 @@ static int nft_target_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -412,7 +435,7 @@ static void nft_match_eval(const struct nft_expr *expr, static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, }; @@ -592,6 +615,22 @@ static int nft_match_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -709,7 +748,7 @@ out_put: static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, .len = NFT_COMPAT_NAME_MAX-1 }, - [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 641dc21f92b4..2bfe3cdfbd58 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -484,6 +484,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, break; #endif case NFT_CT_ID: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); break; default: @@ -1231,7 +1234,30 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_EXPECT_L3PROTO]) priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO])); + switch (priv->l3num) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET) + break; + + return -EINVAL; + case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */ + default: + return -EAFNOSUPPORT; + } + priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); + switch (priv->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + break; + default: + return -EOPNOTSUPP; + } + priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]); priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]); priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 8a43f6f9c90b..7a8707632a81 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -250,10 +250,15 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, break; } - nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); - if (!other_dst) + if (!dst_hold_safe(this_dst)) return -ENOENT; + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); + if (!other_dst) { + dst_release(this_dst); + return -ENOENT; + } + nft_default_forward_path(route, this_dst, dir); nft_default_forward_path(route, other_dst, !dir); @@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; - if (flow_offload_route_init(flow, &route) < 0) - goto err_flow_add; + flow_offload_route_init(flow, &route); if (tcph) { ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; @@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (ret < 0) goto err_flow_add; - dst_release(route.tuple[!dir].dst); return; err_flow_add: flow_offload_free(flow); err_flow_alloc: + dst_release(route.tuple[dir].dst); dst_release(route.tuple[!dir].dst); err_flow_route: clear_bit(IPS_OFFLOAD_BIT, &ct->status); @@ -380,6 +384,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx, { unsigned int hook_mask = (1 << NF_INET_FORWARD); + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, hook_mask); } diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 75c05ef885a9..36ded7d43262 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -58,17 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost) static int nft_limit_init(struct nft_limit_priv *priv, const struct nlattr * const tb[], bool pkts) { + u64 unit, tokens, rate_with_burst; bool invert = false; - u64 unit, tokens; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + if (priv->rate == 0) + return -EINVAL; + unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - priv->nsecs = unit * NSEC_PER_SEC; - if (priv->rate == 0 || priv->nsecs < unit) + if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs)) return -EOVERFLOW; if (tb[NFTA_LIMIT_BURST]) @@ -77,18 +79,25 @@ static int nft_limit_init(struct nft_limit_priv *priv, if (pkts && priv->burst == 0) priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT; - if (priv->rate + priv->burst < priv->rate) + if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst)) return -EOVERFLOW; if (pkts) { - tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst; + u64 tmp = div64_u64(priv->nsecs, priv->rate); + + if (check_mul_overflow(tmp, priv->burst, &tokens)) + return -EOVERFLOW; } else { + u64 tmp; + /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst), - priv->rate); + if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp)) + return -EOVERFLOW; + + tokens = div64_u64(tmp, priv->rate); } if (tb[NFTA_LIMIT_FLAGS]) { diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 353c090f8891..ba7bcce724ef 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -142,6 +142,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx, struct nft_nat *priv = nft_expr_priv(expr); int err; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 71931ec91721..7d21e16499bf 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp const struct nft_rt *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->key) { case NFT_RT_NEXTHOP4: case NFT_RT_NEXTHOP6: diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4e1cc31729b8..58eca2616273 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -342,9 +342,6 @@ #include "nft_set_pipapo_avx2.h" #include "nft_set_pipapo.h" -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); - /** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits @@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; @@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, local_bh_disable(); - map_index = raw_cpu_read(nft_pipapo_scratch_index); - m = rcu_dereference(priv->match); if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) goto out; - res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0); - fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max); + scratch = *raw_cpu_ptr(m->scratch); + + map_index = scratch->map_index; + + res_map = scratch->map + (map_index ? m->bsize_max : 0); + fill_map = scratch->map + (map_index ? 0 : m->bsize_max); memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); @@ -460,7 +460,7 @@ next_match: b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt, last); if (b < 0) { - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return false; @@ -477,7 +477,7 @@ next_match: * current inactive bitmap is clean and can be reused as * *next* bitmap (not initial) for the next packet. */ - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return true; @@ -1101,6 +1101,25 @@ static void pipapo_map(struct nft_pipapo_match *m, f->mt[map[i].to + j].e = e; } +/** + * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address + * @m: Matching data + * @cpu: CPU number + */ +static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) +{ + struct nft_pipapo_scratch *s; + void *mem; + + s = *per_cpu_ptr(m->scratch, cpu); + if (!s) + return; + + mem = s; + mem -= s->align_off; + kfree(mem); +} + /** * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results * @clone: Copy of matching data with pending insertions and deletions @@ -1114,12 +1133,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, int i; for_each_possible_cpu(i) { - unsigned long *scratch; + struct nft_pipapo_scratch *scratch; #ifdef NFT_PIPAPO_ALIGN - unsigned long *scratch_aligned; + void *scratch_aligned; + u32 align_off; #endif - - scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + + scratch = kzalloc_node(struct_size(scratch, map, + bsize_max * 2) + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL, cpu_to_node(i)); if (!scratch) { @@ -1133,14 +1153,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, return -ENOMEM; } - kfree(*per_cpu_ptr(clone->scratch, i)); - - *per_cpu_ptr(clone->scratch, i) = scratch; + pipapo_free_scratch(clone, i); #ifdef NFT_PIPAPO_ALIGN - scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); - *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; + /* Align &scratch->map (not the struct itself): the extra + * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() + * above guarantee we can waste up to those bytes in order + * to align the map field regardless of its offset within + * the struct. + */ + BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); + + scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); + scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); + align_off = scratch_aligned - (void *)scratch; + + scratch = scratch_aligned; + scratch->align_off = align_off; #endif + *per_cpu_ptr(clone->scratch, i) = scratch; } return 0; @@ -1294,11 +1325,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch) goto out_scratch; -#ifdef NFT_PIPAPO_ALIGN - new->scratch_aligned = alloc_percpu(*new->scratch_aligned); - if (!new->scratch_aligned) - goto out_scratch; -#endif for_each_possible_cpu(i) *per_cpu_ptr(new->scratch, i) = NULL; @@ -1350,10 +1376,7 @@ out_lt: } out_scratch_realloc: for_each_possible_cpu(i) - kfree(*per_cpu_ptr(new->scratch, i)); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(new->scratch_aligned); -#endif + pipapo_free_scratch(new, i); out_scratch: free_percpu(new->scratch); kfree(new); @@ -1638,13 +1661,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m) int i; for_each_possible_cpu(i) - kfree(*per_cpu_ptr(m->scratch, i)); + pipapo_free_scratch(m, i); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); - pipapo_free_fields(m); kfree(m); @@ -2132,7 +2151,7 @@ static int nft_pipapo_init(const struct nft_set *set, m->field_count = field_count; m->bsize_max = 0; - m->scratch = alloc_percpu(unsigned long *); + m->scratch = alloc_percpu(struct nft_pipapo_scratch *); if (!m->scratch) { err = -ENOMEM; goto out_scratch; @@ -2140,16 +2159,6 @@ static int nft_pipapo_init(const struct nft_set *set, for_each_possible_cpu(i) *per_cpu_ptr(m->scratch, i) = NULL; -#ifdef NFT_PIPAPO_ALIGN - m->scratch_aligned = alloc_percpu(unsigned long *); - if (!m->scratch_aligned) { - err = -ENOMEM; - goto out_free; - } - for_each_possible_cpu(i) - *per_cpu_ptr(m->scratch_aligned, i) = NULL; -#endif - rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { @@ -2180,9 +2189,6 @@ static int nft_pipapo_init(const struct nft_set *set, return 0; out_free: -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); out_scratch: kfree(m); @@ -2234,13 +2240,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(ctx, set, m); - -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(m->scratch, cpu)); + pipapo_free_scratch(m, cpu); free_percpu(m->scratch); pipapo_free_fields(m); kfree(m); @@ -2250,14 +2251,10 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->clone) { m = priv->clone; - if (priv->dirty) - nft_set_pipapo_match_destroy(ctx, set, m); + nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(priv->clone->scratch_aligned); -#endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); + pipapo_free_scratch(priv->clone, cpu); free_percpu(priv->clone->scratch); pipapo_free_fields(priv->clone); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a75591583e..30a3d092cd84 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -130,21 +130,29 @@ struct nft_pipapo_field { union nft_pipapo_map_bucket *mt; }; +/** + * struct nft_pipapo_scratch - percpu data used for lookup and matching + * @map_index: Current working bitmap index, toggled between field matches + * @align_off: Offset to get the originally allocated address + * @map: store partial matching results during lookup + */ +struct nft_pipapo_scratch { + u8 map_index; + u32 align_off; + unsigned long map[]; +}; + /** * struct nft_pipapo_match - Data used for lookup and matching * @field_count Amount of fields in set * @scratch: Preallocated per-CPU maps for partial matching results - * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes * @bsize_max: Maximum lookup table bucket size of all fields, in longs * @rcu Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { int field_count; -#ifdef NFT_PIPAPO_ALIGN - unsigned long * __percpu *scratch_aligned; -#endif - unsigned long * __percpu *scratch; + struct nft_pipapo_scratch * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; struct nft_pipapo_field f[]; diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 52e0d026d30a..a3a8ddca9918 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -57,7 +57,7 @@ /* Jump to label if @reg is zero */ #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ - asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ "je %l[" #label "]" : : : : label) /* Store 256 bits from YMM register into memory. Contrary to bucket load @@ -71,9 +71,6 @@ #define NFT_PIPAPO_AVX2_ZERO(reg) \ asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg) -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index); - /** * nft_pipapo_avx2_prepare() - Prepare before main algorithm body * @@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); - unsigned long *res, *fill, *scratch; + struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; struct nft_pipapo_match *m; struct nft_pipapo_field *f; + unsigned long *res, *fill; bool map_index; int i, ret = 0; @@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, */ kernel_fpu_begin_mask(0); - scratch = *raw_cpu_ptr(m->scratch_aligned); + scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) { kernel_fpu_end(); return false; } - map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index); - res = scratch + (map_index ? m->bsize_max : 0); - fill = scratch + (map_index ? 0 : m->bsize_max); + map_index = scratch->map_index; + + res = scratch->map + (map_index ? m->bsize_max : 0); + fill = scratch->map + (map_index ? 0 : m->bsize_max); /* Starting map doesn't need to be set for this implementation */ @@ -1221,7 +1220,7 @@ next_match: out: if (i % 2) - raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index); + scratch->map_index = !map_index; kernel_fpu_end(); return ret >= 0; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index e34662f4a71e..5bf5572e945c 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -235,7 +235,7 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, static const struct nft_rbtree_elem * nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe, u8 genmask) + struct nft_rbtree_elem *rbe) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); @@ -254,7 +254,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, while (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe_prev) && - nft_set_elem_active(&rbe_prev->ext, genmask)) + nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) break; prev = rb_prev(prev); @@ -365,7 +365,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_set_elem_active(&rbe->ext, cur_genmask)) { const struct nft_rbtree_elem *removed_end; - removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); + removed_end = nft_rbtree_gc_elem(set, priv, rbe); if (IS_ERR(removed_end)) return PTR_ERR(removed_end); diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 777561b71fcb..f28324fd8d71 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 6cf9a04fbfe2..a450f28a5ef6 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx, break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: err = nf_synproxy_ipv4_init(snet, ctx->net); if (err) goto nf_ct_failure; @@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx) break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: nf_synproxy_ipv4_fini(snet, ctx->net); nf_synproxy_ipv6_fini(snet, ctx->net); break; @@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD)); } diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 62da25ad264b..adb50c39572e 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); } diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 983ade4be3b3..efb505445eac 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = { static struct nft_object_type nft_tunnel_obj_type __read_mostly = { .type = NFT_OBJECT_TUNNEL, + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_obj_ops, .maxattr = NFTA_TUNNEL_KEY_MAX, .policy = nft_tunnel_key_policy, diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 1c5343c936a8..30259846c352 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e const struct nft_xfrm *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->dir) { case XFRM_POLICY_IN: hooks = (1 << NF_INET_FORWARD) | diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index cb833302270a..e9b81cba1e2b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group) static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, gfp_t gfp_mask) { - unsigned int len = skb_end_offset(skb); + unsigned int len = skb->len; struct sk_buff *new; new = alloc_skb(len, gfp_mask); @@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb) if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) - vfree(skb->head); + vfree_atomic(skb->head); skb->head = NULL; } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec5747969f96..f0879295de11 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -453,16 +453,16 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr_init_timers(sk); nr->t1 = - msecs_to_jiffies(sysctl_netrom_transport_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = - msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay)); nr->n2 = - msecs_to_jiffies(sysctl_netrom_transport_maximum_tries); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = - msecs_to_jiffies(sysctl_netrom_transport_busy_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = - msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); - nr->window = sysctl_netrom_transport_requested_window_size; + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout)); + nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; nr->state = NR_STATE_0; @@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) * G8PZT's Xrouter which is sending packets with command type 7 * as an extension of the protocol. */ - if (sysctl_netrom_reset_circuit && + if (READ_ONCE(sysctl_netrom_reset_circuit) && (frametype != NR_RESET || flags != 0)) nr_transmit_reset(skb, 1); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 3aaac4a22b38..2c34389c3ce6 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; - *buff++ = sysctl_netrom_network_ttl_initialiser; + *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 2f084b6f69d7..97944db6b5ac 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 44929657f5b7..5e531394a724 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index baea3cbd76ca..70480869ad1c 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; - nr_neigh->quality = sysctl_netrom_default_path_quality; + nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); nr_neigh->locked = 0; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; @@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) nr_neigh->ax25 = NULL; ax25_cb_put(ax25); - if (++nr_neigh->failed < sysctl_netrom_link_fails_count) { + if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { nr_neigh_put(nr_neigh); return; } @@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (ax25 != NULL) { ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, ax25->ax25_dev->dev, 0, - sysctl_netrom_obsolescence_count_initialiser); + READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); if (ret) return ret; } @@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) return ret; } - if (!sysctl_netrom_routing_control && ax25 != NULL) + if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) return 0; /* Its Time-To-Live has expired */ diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index e2d2af924cff..c3bbd5880850 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype) *dptr++ = nr->my_id; *dptr++ = frametype; *dptr++ = nr->window; - if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser; + if (nr->bpqext) + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); break; case NR_DISCREQ: @@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (mine) { *dptr++ = 0; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 7535afd1537e..b5071a2f597d 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1207,6 +1207,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ead5418c126e..e3c85ceb1f0a 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -47,6 +47,7 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +#define OVS_COPY_ACTIONS_MAX_DEPTH 16 static bool actions_may_change_flow(const struct nlattr *actions) { @@ -2543,13 +2544,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log); + u32 mpls_label_count, bool log, + u32 depth); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -2600,7 +2603,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return err; err = __ovs_nla_copy_actions(net, actions, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2615,7 +2619,8 @@ static int validate_and_copy_dec_ttl(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; int start, action_start, err, rem; @@ -2658,7 +2663,8 @@ static int validate_and_copy_dec_ttl(struct net *net, return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, - vlan_tci, mpls_label_count, log); + vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2672,7 +2678,8 @@ static int validate_and_copy_clone(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { int start, err; u32 exec; @@ -2692,7 +2699,8 @@ static int validate_and_copy_clone(struct net *net, return err; err = __ovs_nla_copy_actions(net, attr, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3061,7 +3069,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, - bool log, bool last) + bool log, bool last, u32 depth) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; @@ -3109,7 +3117,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3122,7 +3131,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3150,12 +3160,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; + if (depth > OVS_COPY_ACTIONS_MAX_DEPTH) + return -EOVERFLOW; + nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -3350,7 +3364,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3421,7 +3435,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3435,7 +3449,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, + depth); if (err) return err; skip_copy = true; @@ -3445,7 +3460,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_DEC_TTL: err = validate_and_copy_dec_ttl(net, a, key, sfa, eth_type, vlan_tci, - mpls_label_count, log); + mpls_label_count, log, + depth); if (err) return err; skip_copy = true; @@ -3485,7 +3501,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, - key->eth.vlan.tci, mpls_label_count, log); + key->eth.vlan.tci, mpls_label_count, log, + 0); if (err) ovs_nla_free_flow_actions(*sfa); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 51882f07ef70..7188ca8d8469 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3284,7 +3284,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality @@ -3295,8 +3295,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, 0); } @@ -3566,11 +3566,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); @@ -3981,7 +3981,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (val < 0 || val > 1) return -EINVAL; - po->prot_hook.ignore_outgoing = !!val; + WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val); return 0; } case PACKET_TX_HAS_OFF: @@ -4110,7 +4110,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_IGNORE_OUTGOING: - val = po->prot_hook.ignore_outgoing; + val = READ_ONCE(po->prot_hook.ignore_outgoing); break; case PACKET_ROLLOVER_STATS: if (!po->rollover) diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index ff5f49ab236e..39a6c5713d0b 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -35,10 +35,10 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); answ = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(answ, (int __user *)arg); case SIOCPNADDRESOURCE: diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 83ea13a50690..607f54c23647 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct pep_sock *pn = pep_sk(sk); @@ -930,15 +961,7 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - answ = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - answ = skb_peek(&sk->sk_receive_queue)->len; - else - answ = 0; - release_sock(sk); + answ = pep_first_packet_length(sk); ret = put_user(answ, (int __user *)arg); break; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 3ff6995244e5..d107f7605db4 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval, rs->rs_rx_traces = trace.rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { - if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { + if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) { rs->rs_rx_traces = 0; return -EFAULT; } diff --git a/net/rds/rdma.c b/net/rds/rdma.c index fba82d36593a..a4e3c5de998b 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, kfree(sg); } ret = PTR_ERR(trans_private); + /* Trigger connection so that its ready for the next retry */ + if (ret == -ENODEV) + rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/rds/send.c b/net/rds/send.c index 0c5504068e3c..0005fb43f2df 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -103,13 +103,12 @@ EXPORT_SYMBOL_GPL(rds_send_path_reset); static int acquire_in_xmit(struct rds_conn_path *cp) { - return test_and_set_bit(RDS_IN_XMIT, &cp->cp_flags) == 0; + return test_and_set_bit_lock(RDS_IN_XMIT, &cp->cp_flags) == 0; } static void release_in_xmit(struct rds_conn_path *cp) { - clear_bit(RDS_IN_XMIT, &cp->cp_flags); - smp_mb__after_atomic(); + clear_bit_unlock(RDS_IN_XMIT, &cp->cp_flags); /* * We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk @@ -1314,12 +1313,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); - if (ret) { - /* Trigger connection so that its ready for the next retry */ - if (ret == -EAGAIN) - rds_conn_connect_if_down(conn); + if (ret) goto out; - } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index aab069701398..5d91ef562ff7 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -41,6 +41,14 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _enter("%d", conn->debug_id); + if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &pkt.ack, sizeof(pkt.ack)) < 0) + return; + if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE) + return; + } + chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 6e6aa02c6f9e..249353417a18 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -31,7 +31,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, struct rxrpc_conn_proto k; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rb_node *p; - unsigned int seq = 0; + unsigned int seq = 1; k.epoch = sp->hdr.epoch; k.cid = sp->hdr.cid & RXRPC_CIDMASK; @@ -41,6 +41,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, * under just the RCU read lock, so we have to check for * changes. */ + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&peer->service_conn_lock, &seq); p = rcu_dereference_raw(peer->service_conns.rb_node); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 24cf0bf7c80e..9c4a80fce794 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -45,23 +45,6 @@ if NET_SCHED comment "Queueing/Scheduling" -config NET_SCH_CBQ - tristate "Class Based Queueing (CBQ)" - help - Say Y here if you want to use the Class-Based Queueing (CBQ) packet - scheduling algorithm. This algorithm classifies the waiting packets - into a tree-like hierarchy of classes; the leaves of this tree are - in turn scheduled by separate algorithms. - - See the top of for more details. - - CBQ is a commonly used scheduler, so if you're unsure, you should - say Y here. Then say Y to all the queueing algorithms below that you - want to use as leaf disciplines. - - To compile this code as a module, choose M here: the - module will be called sch_cbq. - config NET_SCH_HTB tristate "Hierarchical Token Bucket (HTB)" help @@ -85,20 +68,6 @@ config NET_SCH_HFSC To compile this code as a module, choose M here: the module will be called sch_hfsc. -config NET_SCH_ATM - tristate "ATM Virtual Circuits (ATM)" - depends on ATM - help - Say Y here if you want to use the ATM pseudo-scheduler. This - provides a framework for invoking classifiers, which in turn - select classes of this queuing discipline. Each class maps - the flow(s) it is handling to a given virtual circuit. - - See the top of for more details. - - To compile this code as a module, choose M here: the - module will be called sch_atm. - config NET_SCH_PRIO tristate "Multi Band Priority Queueing (PRIO)" help @@ -217,17 +186,6 @@ config NET_SCH_GRED To compile this code as a module, choose M here: the module will be called sch_gred. -config NET_SCH_DSMARK - tristate "Differentiated Services marker (DSMARK)" - help - Say Y if you want to schedule packets according to the - Differentiated Services architecture proposed in RFC 2475. - Technical information on this method, with pointers to associated - RFCs, is available at . - - To compile this code as a module, choose M here: the - module will be called sch_dsmark. - config NET_SCH_NETEM tristate "Network emulator (NETEM)" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 8a33a35fc50d..a66ac1e7b79b 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -33,20 +33,17 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_ACT_CT) += act_ct.o obj-$(CONFIG_NET_ACT_GATE) += act_gate.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o -obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o -obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o -obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c deleted file mode 100644 index 4a981ca90b0b..000000000000 --- a/net/sched/sch_atm.c +++ /dev/null @@ -1,706 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for fput */ -#include -#include -#include - -/* - * The ATM queuing discipline provides a framework for invoking classifiers - * (aka "filters"), which in turn select classes of this queuing discipline. - * Each class maps the flow(s) it is handling to a given VC. Multiple classes - * may share the same VC. - * - * When creating a class, VCs are specified by passing the number of the open - * socket descriptor by which the calling process references the VC. The kernel - * keeps the VC open at least until all classes using it are removed. - * - * In this file, most functions are named atm_tc_* to avoid confusion with all - * the atm_* in net/atm. This naming convention differs from what's used in the - * rest of net/sched. - * - * Known bugs: - * - sometimes messes up the IP stack - * - any manipulations besides the few operations described in the README, are - * untested and likely to crash the system - * - should lock the flow while there is data in the queue (?) - */ - -#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) - -struct atm_flow_data { - struct Qdisc_class_common common; - struct Qdisc *q; /* FIFO, TBF, etc. */ - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ - void (*old_pop)(struct atm_vcc *vcc, - struct sk_buff *skb); /* chaining */ - struct atm_qdisc_data *parent; /* parent qdisc */ - struct socket *sock; /* for closing */ - int ref; /* reference count */ - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct list_head list; - struct atm_flow_data *excess; /* flow for excess traffic; - NULL to set CLP instead */ - int hdr_len; - unsigned char hdr[]; /* header data; MUST BE LAST */ -}; - -struct atm_qdisc_data { - struct atm_flow_data link; /* unclassified skbs go here */ - struct list_head flows; /* NB: "link" is also on this - list */ - struct tasklet_struct task; /* dequeue tasklet */ -}; - -/* ------------------------- Class/flow operations ------------------------- */ - -static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - list_for_each_entry(flow, &p->flows, list) { - if (flow->common.classid == classid) - return flow; - } - return NULL; -} - -static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", - sch, p, flow, new, old); - if (list_empty(&flow->list)) - return -EINVAL; - if (!new) - new = &noop_qdisc; - *old = flow->q; - flow->q = new; - if (*old) - qdisc_reset(*old); - return 0; -} - -static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); - return flow ? flow->q : NULL; -} - -static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -static unsigned long atm_tc_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - if (flow) - flow->ref++; - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -/* - * atm_tc_put handles all destructions, including the ones that are explicitly - * requested (atm_tc_destroy, etc.). The assumption here is that we never drop - * anything that still seems to be in use. - */ -static void atm_tc_put(struct Qdisc *sch, unsigned long cl) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (--flow->ref) - return; - pr_debug("atm_tc_put: destroying\n"); - list_del_init(&flow->list); - pr_debug("atm_tc_put: qdisc %p\n", flow->q); - qdisc_put(flow->q); - tcf_block_put(flow->block); - if (flow->sock) { - pr_debug("atm_tc_put: f_count %ld\n", - file_count(flow->sock->file)); - flow->vcc->pop = flow->old_pop; - sockfd_put(flow->sock); - } - if (flow->excess) - atm_tc_put(sch, (unsigned long)flow->excess); - if (flow != &p->link) - kfree(flow); - /* - * If flow == &p->link, the qdisc no longer works at this point and - * needs to be removed. (By the caller of atm_tc_put.) - */ -} - -static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; - - pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); - VCC2FLOW(vcc)->old_pop(vcc, skb); - tasklet_schedule(&p->task); -} - -static const u8 llc_oui_ip[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ - 0x00, 0x00, - 0x08, 0x00 -}; /* Ethertype IP (0800) */ - -static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = { - [TCA_ATM_FD] = { .type = NLA_U32 }, - [TCA_ATM_EXCESS] = { .type = NLA_U32 }, -}; - -static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)*arg; - struct atm_flow_data *excess = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_ATM_MAX + 1]; - struct socket *sock; - int fd, error, hdr_len; - void *hdr; - - pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," - "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); - /* - * The concept of parents doesn't apply for this qdisc. - */ - if (parent && parent != TC_H_ROOT && parent != sch->handle) - return -EINVAL; - /* - * ATM classes cannot be changed. In order to change properties of the - * ATM connection, that socket needs to be modified directly (via the - * native ATM API. In order to send a flow to a different VC, the old - * class needs to be removed and a new one added. (This may be changed - * later.) - */ - if (flow) - return -EBUSY; - if (opt == NULL) - return -EINVAL; - - error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, - NULL); - if (error < 0) - return error; - - if (!tb[TCA_ATM_FD]) - return -EINVAL; - fd = nla_get_u32(tb[TCA_ATM_FD]); - pr_debug("atm_tc_change: fd %d\n", fd); - if (tb[TCA_ATM_HDR]) { - hdr_len = nla_len(tb[TCA_ATM_HDR]); - hdr = nla_data(tb[TCA_ATM_HDR]); - } else { - hdr_len = RFC1483LLC_LEN; - hdr = NULL; /* default LLC/SNAP for IP */ - } - if (!tb[TCA_ATM_EXCESS]) - excess = NULL; - else { - excess = (struct atm_flow_data *) - atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); - if (!excess) - return -ENOENT; - } - pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n", - opt->nla_type, nla_len(opt), hdr_len); - sock = sockfd_lookup(fd, &error); - if (!sock) - return error; /* f_count++ */ - pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); - if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { - error = -EPROTOTYPE; - goto err_out; - } - /* @@@ should check if the socket is really operational or we'll crash - on vcc->send */ - if (classid) { - if (TC_H_MAJ(classid ^ sch->handle)) { - pr_debug("atm_tc_change: classid mismatch\n"); - error = -EINVAL; - goto err_out; - } - } else { - int i; - unsigned long cl; - - for (i = 1; i < 0x8000; i++) { - classid = TC_H_MAKE(sch->handle, 0x8000 | i); - cl = atm_tc_find(sch, classid); - if (!cl) - break; - } - } - pr_debug("atm_tc_change: new id %x\n", classid); - flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); - pr_debug("atm_tc_change: flow %p\n", flow); - if (!flow) { - error = -ENOBUFS; - goto err_out; - } - - error = tcf_block_get(&flow->block, &flow->filter_list, sch, - extack); - if (error) { - kfree(flow); - goto err_out; - } - - flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - extack); - if (!flow->q) - flow->q = &noop_qdisc; - pr_debug("atm_tc_change: qdisc %p\n", flow->q); - flow->sock = sock; - flow->vcc = ATM_SD(sock); /* speedup */ - flow->vcc->user_back = flow; - pr_debug("atm_tc_change: vcc %p\n", flow->vcc); - flow->old_pop = flow->vcc->pop; - flow->parent = p; - flow->vcc->pop = sch_atm_pop; - flow->common.classid = classid; - flow->ref = 1; - flow->excess = excess; - list_add(&flow->list, &p->link.list); - flow->hdr_len = hdr_len; - if (hdr) - memcpy(flow->hdr, hdr, hdr_len); - else - memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); - *arg = (unsigned long)flow; - return 0; -err_out: - sockfd_put(sock); - return error; -} - -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (list_empty(&flow->list)) - return -EINVAL; - if (rcu_access_pointer(flow->filter_list) || flow == &p->link) - return -EBUSY; - /* - * Reference count must be 2: one for "keepalive" (set at class - * creation), and one for the reference held when calling delete. - */ - if (flow->ref < 2) { - pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); - return -EINVAL; - } - if (flow->ref > 2) - return -EBUSY; /* catch references via excess, etc. */ - atm_tc_put(sch, arg); - return 0; -} - -static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); - if (walker->stop) - return; - list_for_each_entry(flow, &p->flows, list) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker)) - break; - } -} - -static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - return flow ? flow->block : p->link.block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - struct tcf_result res; - int result; - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - - pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - result = TC_ACT_OK; /* be nice to gcc */ - flow = NULL; - if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) { - struct tcf_proto *fl; - - list_for_each_entry(flow, &p->flows, list) { - fl = rcu_dereference_bh(flow->filter_list); - if (fl) { - result = tcf_classify(skb, NULL, fl, &res, true); - if (result < 0) - continue; - if (result == TC_ACT_SHOT) - goto done; - - flow = (struct atm_flow_data *)res.class; - if (!flow) - flow = lookup_flow(sch, res.classid); - goto drop; - } - } - flow = NULL; -done: - ; - } - if (!flow) { - flow = &p->link; - } else { - if (flow->vcc) - ATM_SKB(skb)->atm_options = flow->vcc->atm_options; - /*@@@ looks good ... but it's not supposed to work :-) */ -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - case TC_ACT_SHOT: - __qdisc_drop(skb, to_free); - goto drop; - case TC_ACT_RECLASSIFY: - if (flow->excess) - flow = flow->excess; - else - ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; - break; - } -#endif - } - - ret = qdisc_enqueue(skb, flow->q, to_free); - if (ret != NET_XMIT_SUCCESS) { -drop: __maybe_unused - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - if (flow) - flow->qstats.drops++; - } - return ret; - } - /* - * Okay, this may seem weird. We pretend we've dropped the packet if - * it goes via ATM. The reason for this is that the outer qdisc - * expects to be able to q->dequeue the packet later on if we return - * success at this place. Also, sch->q.qdisc needs to reflect whether - * there is a packet egligible for dequeuing or not. Note that the - * statistics of the outer qdisc are necessarily wrong because of all - * this. There's currently no correct solution for this. - */ - if (flow == &p->link) { - sch->q.qlen++; - return NET_XMIT_SUCCESS; - } - tasklet_schedule(&p->task); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -/* - * Dequeue packets and send them over ATM. Note that we quite deliberately - * avoid checking net_device's flow control here, simply because sch_atm - * uses its own channels, which have nothing to do with any CLIP/LANE/or - * non-ATM interfaces. - */ - -static void sch_atm_dequeue(struct tasklet_struct *t) -{ - struct atm_qdisc_data *p = from_tasklet(p, t, task); - struct Qdisc *sch = qdisc_from_priv(p); - struct atm_flow_data *flow; - struct sk_buff *skb; - - pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - if (flow == &p->link) - continue; - /* - * If traffic is properly shaped, this won't generate nasty - * little bursts. Otherwise, it may ... (but that's okay) - */ - while ((skb = flow->q->ops->peek(flow->q))) { - if (!atm_may_send(flow->vcc, skb->truesize)) - break; - - skb = qdisc_dequeue_peeked(flow->q); - if (unlikely(!skb)) - break; - - qdisc_bstats_update(sch, skb); - bstats_update(&flow->bstats, skb); - pr_debug("atm_tc_dequeue: sending on class %p\n", flow); - /* remove any LL header somebody else has attached */ - skb_pull(skb, skb_network_offset(skb)); - if (skb_headroom(skb) < flow->hdr_len) { - struct sk_buff *new; - - new = skb_realloc_headroom(skb, flow->hdr_len); - dev_kfree_skb(skb); - if (!new) - continue; - skb = new; - } - pr_debug("sch_atm_dequeue: ip %p, data %p\n", - skb_network_header(skb), skb->data); - ATM_SKB(skb)->vcc = flow->vcc; - memcpy(skb_push(skb, flow->hdr_len), flow->hdr, - flow->hdr_len); - refcount_add(skb->truesize, - &sk_atm(flow->vcc)->sk_wmem_alloc); - /* atm.atm_options are already set by atm_tc_enqueue */ - flow->vcc->send(flow->vcc, skb); - } - } -} - -static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - - pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); - tasklet_schedule(&p->task); - skb = qdisc_dequeue_peeked(p->link.q); - if (skb) - sch->q.qlen--; - return skb; -} - -static struct sk_buff *atm_tc_peek(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - - pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); - - return p->link.q->ops->peek(p->link.q); -} - -static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - INIT_LIST_HEAD(&p->flows); - INIT_LIST_HEAD(&p->link.list); - gnet_stats_basic_sync_init(&p->link.bstats); - list_add(&p->link.list, &p->flows); - p->link.q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, sch->handle, extack); - if (!p->link.q) - p->link.q = &noop_qdisc; - pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - p->link.vcc = NULL; - p->link.sock = NULL; - p->link.common.classid = sch->handle; - p->link.ref = 1; - - err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, - extack); - if (err) - return err; - - tasklet_setup(&p->task, sch_atm_dequeue); - return 0; -} - -static void atm_tc_reset(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) - qdisc_reset(flow->q); -} - -static void atm_tc_destroy(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow, *tmp; - - pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - tcf_block_put(flow->block); - flow->block = NULL; - } - - list_for_each_entry_safe(flow, tmp, &p->flows, list) { - if (flow->ref > 1) - pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); - atm_tc_put(sch, (unsigned long)flow); - } - tasklet_kill(&p->task); -} - -static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - struct nlattr *nest; - - pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", - sch, p, flow, skb, tcm); - if (list_empty(&flow->list)) - return -EINVAL; - tcm->tcm_handle = flow->common.classid; - tcm->tcm_info = flow->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr)) - goto nla_put_failure; - if (flow->vcc) { - struct sockaddr_atmpvc pvc; - int state; - - memset(&pvc, 0, sizeof(pvc)); - pvc.sap_family = AF_ATMPVC; - pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; - pvc.sap_addr.vpi = flow->vcc->vpi; - pvc.sap_addr.vci = flow->vcc->vci; - if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc)) - goto nla_put_failure; - state = ATM_VF2VS(flow->vcc->flags); - if (nla_put_u32(skb, TCA_ATM_STATE, state)) - goto nla_put_failure; - } - if (flow->excess) { - if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) - goto nla_put_failure; - } else { - if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) - goto nla_put_failure; - } - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} -static int -atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || - gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) - return -1; - - return 0; -} - -static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - return 0; -} - -static const struct Qdisc_class_ops atm_class_ops = { - .graft = atm_tc_graft, - .leaf = atm_tc_leaf, - .find = atm_tc_find, - .change = atm_tc_change, - .delete = atm_tc_delete, - .walk = atm_tc_walk, - .tcf_block = atm_tc_tcf_block, - .bind_tcf = atm_tc_bind_filter, - .unbind_tcf = atm_tc_put, - .dump = atm_tc_dump_class, - .dump_stats = atm_tc_dump_class_stats, -}; - -static struct Qdisc_ops atm_qdisc_ops __read_mostly = { - .cl_ops = &atm_class_ops, - .id = "atm", - .priv_size = sizeof(struct atm_qdisc_data), - .enqueue = atm_tc_enqueue, - .dequeue = atm_tc_dequeue, - .peek = atm_tc_peek, - .init = atm_tc_init, - .reset = atm_tc_reset, - .destroy = atm_tc_destroy, - .dump = atm_tc_dump, - .owner = THIS_MODULE, -}; - -static int __init atm_init(void) -{ - return register_qdisc(&atm_qdisc_ops); -} - -static void __exit atm_exit(void) -{ - unregister_qdisc(&atm_qdisc_ops); -} - -module_init(atm_init) -module_exit(atm_exit) -MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c deleted file mode 100644 index 36db5f6782f2..000000000000 --- a/net/sched/sch_cbq.c +++ /dev/null @@ -1,1727 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/sch_cbq.c Class-Based Queueing discipline. - * - * Authors: Alexey Kuznetsov, - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* Class-Based Queueing (CBQ) algorithm. - ======================================= - - Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource - Management Models for Packet Networks", - IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 - - [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 - - [3] Sally Floyd, "Notes on Class-Based Queueing: Setting - Parameters", 1996 - - [4] Sally Floyd and Michael Speer, "Experimental Results - for Class-Based Queueing", 1998, not published. - - ----------------------------------------------------------------------- - - Algorithm skeleton was taken from NS simulator cbq.cc. - If someone wants to check this code against the LBL version, - he should take into account that ONLY the skeleton was borrowed, - the implementation is different. Particularly: - - --- The WRR algorithm is different. Our version looks more - reasonable (I hope) and works when quanta are allowed to be - less than MTU, which is always the case when real time classes - have small rates. Note, that the statement of [3] is - incomplete, delay may actually be estimated even if class - per-round allotment is less than MTU. Namely, if per-round - allotment is W*r_i, and r_1+...+r_k = r < 1 - - delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B - - In the worst case we have IntServ estimate with D = W*r+k*MTU - and C = MTU*r. The proof (if correct at all) is trivial. - - - --- It seems that cbq-2.0 is not very accurate. At least, I cannot - interpret some places, which look like wrong translations - from NS. Anyone is advised to find these differences - and explain to me, why I am wrong 8). - - --- Linux has no EOI event, so that we cannot estimate true class - idle time. Workaround is to consider the next dequeue event - as sign that previous packet is finished. This is wrong because of - internal device queueing, but on a permanently loaded link it is true. - Moreover, combined with clock integrator, this scheme looks - very close to an ideal solution. */ - -struct cbq_sched_data; - - -struct cbq_class { - struct Qdisc_class_common common; - struct cbq_class *next_alive; /* next class with backlog in this priority band */ - -/* Parameters */ - unsigned char priority; /* class priority */ - unsigned char priority2; /* priority to be used after overlimit */ - unsigned char ewma_log; /* time constant for idle time calculation */ - - u32 defmap; - - /* Link-sharing scheduler parameters */ - long maxidle; /* Class parameters: see below. */ - long offtime; - long minidle; - u32 avpkt; - struct qdisc_rate_table *R_tab; - - /* General scheduler (WRR) parameters */ - long allot; - long quantum; /* Allotment per WRR round */ - long weight; /* Relative allotment: see below */ - - struct Qdisc *qdisc; /* Ptr to CBQ discipline */ - struct cbq_class *split; /* Ptr to split node */ - struct cbq_class *share; /* Ptr to LS parent in the class tree */ - struct cbq_class *tparent; /* Ptr to tree parent in the class tree */ - struct cbq_class *borrow; /* NULL if class is bandwidth limited; - parent otherwise */ - struct cbq_class *sibling; /* Sibling chain */ - struct cbq_class *children; /* Pointer to children chain */ - - struct Qdisc *q; /* Elementary queueing discipline */ - - -/* Variables */ - unsigned char cpriority; /* Effective priority */ - unsigned char delayed; - unsigned char level; /* level of the class in hierarchy: - 0 for leaf classes, and maximal - level of children + 1 for nodes. - */ - - psched_time_t last; /* Last end of service */ - psched_time_t undertime; - long avgidle; - long deficit; /* Saved deficit for WRR */ - psched_time_t penalized; - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct tc_cbq_xstats xstats; - - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - - int filters; - - struct cbq_class *defaults[TC_PRIO_MAX + 1]; -}; - -struct cbq_sched_data { - struct Qdisc_class_hash clhash; /* Hash table of all classes */ - int nclasses[TC_CBQ_MAXPRIO + 1]; - unsigned int quanta[TC_CBQ_MAXPRIO + 1]; - - struct cbq_class link; - - unsigned int activemask; - struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes - with backlog */ - -#ifdef CONFIG_NET_CLS_ACT - struct cbq_class *rx_class; -#endif - struct cbq_class *tx_class; - struct cbq_class *tx_borrowed; - int tx_len; - psched_time_t now; /* Cached timestamp */ - unsigned int pmask; - - struct qdisc_watchdog watchdog; /* Watchdog timer, - started when CBQ has - backlog, but cannot - transmit just now */ - psched_tdiff_t wd_expires; - int toplevel; - u32 hgenerator; -}; - - -#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) - -static inline struct cbq_class * -cbq_class_lookup(struct cbq_sched_data *q, u32 classid) -{ - struct Qdisc_class_common *clc; - - clc = qdisc_class_find(&q->clhash, classid); - if (clc == NULL) - return NULL; - return container_of(clc, struct cbq_class, common); -} - -#ifdef CONFIG_NET_CLS_ACT - -static struct cbq_class * -cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) -{ - struct cbq_class *cl; - - for (cl = this->tparent; cl; cl = cl->tparent) { - struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; - - if (new != NULL && new != this) - return new; - } - return NULL; -} - -#endif - -/* Classify packet. The procedure is pretty complicated, but - * it allows us to combine link sharing and priority scheduling - * transparently. - * - * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, - * so that it resolves to split nodes. Then packets are classified - * by logical priority, or a more specific classifier may be attached - * to the split node. - */ - -static struct cbq_class * -cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *head = &q->link; - struct cbq_class **defmap; - struct cbq_class *cl = NULL; - u32 prio = skb->priority; - struct tcf_proto *fl; - struct tcf_result res; - - /* - * Step 1. If skb->priority points to one of our classes, use it. - */ - if (TC_H_MAJ(prio ^ sch->handle) == 0 && - (cl = cbq_class_lookup(q, prio)) != NULL) - return cl; - - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - for (;;) { - int result = 0; - defmap = head->defaults; - - fl = rcu_dereference_bh(head->filter_list); - /* - * Step 2+n. Apply classifier. - */ - result = tcf_classify(skb, NULL, fl, &res, true); - if (!fl || result < 0) - goto fallback; - if (result == TC_ACT_SHOT) - return NULL; - - cl = (void *)res.class; - if (!cl) { - if (TC_H_MAJ(res.classid)) - cl = cbq_class_lookup(q, res.classid); - else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) - cl = defmap[TC_PRIO_BESTEFFORT]; - - if (cl == NULL) - goto fallback; - } - if (cl->level >= head->level) - goto fallback; -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - fallthrough; - case TC_ACT_RECLASSIFY: - return cbq_reclassify(skb, cl); - } -#endif - if (cl->level == 0) - return cl; - - /* - * Step 3+n. If classifier selected a link sharing class, - * apply agency specific classifier. - * Repeat this procedure until we hit a leaf node. - */ - head = cl; - } - -fallback: - cl = head; - - /* - * Step 4. No success... - */ - if (TC_H_MAJ(prio) == 0 && - !(cl = head->defaults[prio & TC_PRIO_MAX]) && - !(cl = head->defaults[TC_PRIO_BESTEFFORT])) - return head; - - return cl; -} - -/* - * A packet has just been enqueued on the empty class. - * cbq_activate_class adds it to the tail of active class list - * of its priority band. - */ - -static inline void cbq_activate_class(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - int prio = cl->cpriority; - struct cbq_class *cl_tail; - - cl_tail = q->active[prio]; - q->active[prio] = cl; - - if (cl_tail != NULL) { - cl->next_alive = cl_tail->next_alive; - cl_tail->next_alive = cl; - } else { - cl->next_alive = cl; - q->activemask |= (1<qdisc); - int prio = this->cpriority; - struct cbq_class *cl; - struct cbq_class *cl_prev = q->active[prio]; - - do { - cl = cl_prev->next_alive; - if (cl == this) { - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - if (cl == q->active[prio]) { - q->active[prio] = cl_prev; - if (cl == q->active[prio]) { - q->active[prio] = NULL; - q->activemask &= ~(1<active[prio]); -} - -static void -cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) -{ - int toplevel = q->toplevel; - - if (toplevel > cl->level) { - psched_time_t now = psched_get_time(); - - do { - if (cl->undertime < now) { - q->toplevel = cl->level; - return; - } - } while ((cl = cl->borrow) != NULL && toplevel > cl->level); - } -} - -static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - int ret; - struct cbq_class *cl = cbq_classify(skb, sch, &ret); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; -#endif - if (cl == NULL) { - if (ret & __NET_XMIT_BYPASS) - qdisc_qstats_drop(sch); - __qdisc_drop(skb, to_free); - return ret; - } - - ret = qdisc_enqueue(skb, cl->q, to_free); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - cbq_mark_toplevel(q, cl); - if (!cl->next_alive) - cbq_activate_class(cl); - return ret; - } - - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; - } - return ret; -} - -/* Overlimit action: penalize leaf class by adding offtime */ -static void cbq_overlimit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - psched_tdiff_t delay = cl->undertime - q->now; - - if (!cl->delayed) { - delay += cl->offtime; - - /* - * Class goes to sleep, so that it will have no - * chance to work avgidle. Let's forgive it 8) - * - * BTW cbq-2.0 has a crap in this - * place, apparently they forgot to shift it by cl->ewma_log. - */ - if (cl->avgidle < 0) - delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); - if (cl->avgidle < cl->minidle) - cl->avgidle = cl->minidle; - if (delay <= 0) - delay = 1; - cl->undertime = q->now + delay; - - cl->xstats.overactions++; - cl->delayed = 1; - } - if (q->wd_expires == 0 || q->wd_expires > delay) - q->wd_expires = delay; - - /* Dirty work! We must schedule wakeups based on - * real available rate, rather than leaf rate, - * which may be tiny (even zero). - */ - if (q->toplevel == TC_CBQ_MAXLEVEL) { - struct cbq_class *b; - psched_tdiff_t base_delay = q->wd_expires; - - for (b = cl->borrow; b; b = b->borrow) { - delay = b->undertime - q->now; - if (delay < base_delay) { - if (delay <= 0) - delay = 1; - base_delay = delay; - } - } - - q->wd_expires = base_delay; - } -} - -/* - * It is mission critical procedure. - * - * We "regenerate" toplevel cutoff, if transmitting class - * has backlog and it is not regulated. It is not part of - * original CBQ description, but looks more reasonable. - * Probably, it is wrong. This question needs further investigation. - */ - -static inline void -cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, - struct cbq_class *borrowed) -{ - if (cl && q->toplevel >= borrowed->level) { - if (cl->q->q.qlen > 1) { - do { - if (borrowed->undertime == PSCHED_PASTPERFECT) { - q->toplevel = borrowed->level; - return; - } - } while ((borrowed = borrowed->borrow) != NULL); - } -#if 0 - /* It is not necessary now. Uncommenting it - will save CPU cycles, but decrease fairness. - */ - q->toplevel = TC_CBQ_MAXLEVEL; -#endif - } -} - -static void -cbq_update(struct cbq_sched_data *q) -{ - struct cbq_class *this = q->tx_class; - struct cbq_class *cl = this; - int len = q->tx_len; - psched_time_t now; - - q->tx_class = NULL; - /* Time integrator. We calculate EOS time - * by adding expected packet transmission time. - */ - now = q->now + L2T(&q->link, len); - - for ( ; cl; cl = cl->share) { - long avgidle = cl->avgidle; - long idle; - - _bstats_update(&cl->bstats, len, 1); - - /* - * (now - last) is total time between packet right edges. - * (last_pktlen/rate) is "virtual" busy time, so that - * - * idle = (now - last) - last_pktlen/rate - */ - - idle = now - cl->last; - if ((unsigned long)idle > 128*1024*1024) { - avgidle = cl->maxidle; - } else { - idle -= L2T(cl, len); - - /* true_avgidle := (1-W)*true_avgidle + W*idle, - * where W=2^{-ewma_log}. But cl->avgidle is scaled: - * cl->avgidle == true_avgidle/W, - * hence: - */ - avgidle += idle - (avgidle>>cl->ewma_log); - } - - if (avgidle <= 0) { - /* Overlimit or at-limit */ - - if (avgidle < cl->minidle) - avgidle = cl->minidle; - - cl->avgidle = avgidle; - - /* Calculate expected time, when this class - * will be allowed to send. - * It will occur, when: - * (1-W)*true_avgidle + W*delay = 0, i.e. - * idle = (1/W - 1)*(-true_avgidle) - * or - * idle = (1 - W)*(-cl->avgidle); - */ - idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); - - /* - * That is not all. - * To maintain the rate allocated to the class, - * we add to undertime virtual clock, - * necessary to complete transmitted packet. - * (len/phys_bandwidth has been already passed - * to the moment of cbq_update) - */ - - idle -= L2T(&q->link, len); - idle += L2T(cl, len); - - cl->undertime = now + idle; - } else { - /* Underlimit */ - - cl->undertime = PSCHED_PASTPERFECT; - if (avgidle > cl->maxidle) - cl->avgidle = cl->maxidle; - else - cl->avgidle = avgidle; - } - if ((s64)(now - cl->last) > 0) - cl->last = now; - } - - cbq_update_toplevel(q, this, q->tx_borrowed); -} - -static inline struct cbq_class * -cbq_under_limit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *this_cl = cl; - - if (cl->tparent == NULL) - return cl; - - if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) { - cl->delayed = 0; - return cl; - } - - do { - /* It is very suspicious place. Now overlimit - * action is generated for not bounded classes - * only if link is completely congested. - * Though it is in agree with ancestor-only paradigm, - * it looks very stupid. Particularly, - * it means that this chunk of code will either - * never be called or result in strong amplification - * of burstiness. Dangerous, silly, and, however, - * no another solution exists. - */ - cl = cl->borrow; - if (!cl) { - this_cl->qstats.overlimits++; - cbq_overlimit(this_cl); - return NULL; - } - if (cl->level > q->toplevel) - return NULL; - } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime); - - cl->delayed = 0; - return cl; -} - -static inline struct sk_buff * -cbq_dequeue_prio(struct Qdisc *sch, int prio) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl_tail, *cl_prev, *cl; - struct sk_buff *skb; - int deficit; - - cl_tail = cl_prev = q->active[prio]; - cl = cl_prev->next_alive; - - do { - deficit = 0; - - /* Start round */ - do { - struct cbq_class *borrow = cl; - - if (cl->q->q.qlen && - (borrow = cbq_under_limit(cl)) == NULL) - goto skip_class; - - if (cl->deficit <= 0) { - /* Class exhausted its allotment per - * this round. Switch to the next one. - */ - deficit = 1; - cl->deficit += cl->quantum; - goto next_class; - } - - skb = cl->q->dequeue(cl->q); - - /* Class did not give us any skb :-( - * It could occur even if cl->q->q.qlen != 0 - * f.e. if cl->q == "tbf" - */ - if (skb == NULL) - goto skip_class; - - cl->deficit -= qdisc_pkt_len(skb); - q->tx_class = cl; - q->tx_borrowed = borrow; - if (borrow != cl) { -#ifndef CBQ_XSTATS_BORROWS_BYTES - borrow->xstats.borrows++; - cl->xstats.borrows++; -#else - borrow->xstats.borrows += qdisc_pkt_len(skb); - cl->xstats.borrows += qdisc_pkt_len(skb); -#endif - } - q->tx_len = qdisc_pkt_len(skb); - - if (cl->deficit <= 0) { - q->active[prio] = cl; - cl = cl->next_alive; - cl->deficit += cl->quantum; - } - return skb; - -skip_class: - if (cl->q->q.qlen == 0 || prio != cl->cpriority) { - /* Class is empty or penalized. - * Unlink it from active chain. - */ - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - /* Did cl_tail point to it? */ - if (cl == cl_tail) { - /* Repair it! */ - cl_tail = cl_prev; - - /* Was it the last class in this band? */ - if (cl == cl_tail) { - /* Kill the band! */ - q->active[prio] = NULL; - q->activemask &= ~(1<q->q.qlen) - cbq_activate_class(cl); - return NULL; - } - - q->active[prio] = cl_tail; - } - if (cl->q->q.qlen) - cbq_activate_class(cl); - - cl = cl_prev; - } - -next_class: - cl_prev = cl; - cl = cl->next_alive; - } while (cl_prev != cl_tail); - } while (deficit); - - q->active[prio] = cl_prev; - - return NULL; -} - -static inline struct sk_buff * -cbq_dequeue_1(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct sk_buff *skb; - unsigned int activemask; - - activemask = q->activemask & 0xFF; - while (activemask) { - int prio = ffz(~activemask); - activemask &= ~(1<tx_class) - cbq_update(q); - - q->now = now; - - for (;;) { - q->wd_expires = 0; - - skb = cbq_dequeue_1(sch); - if (skb) { - qdisc_bstats_update(sch, skb); - sch->q.qlen--; - return skb; - } - - /* All the classes are overlimit. - * - * It is possible, if: - * - * 1. Scheduler is empty. - * 2. Toplevel cutoff inhibited borrowing. - * 3. Root class is overlimit. - * - * Reset 2d and 3d conditions and retry. - * - * Note, that NS and cbq-2.0 are buggy, peeking - * an arbitrary class is appropriate for ancestor-only - * sharing, but not for toplevel algorithm. - * - * Our version is better, but slower, because it requires - * two passes, but it is unavoidable with top-level sharing. - */ - - if (q->toplevel == TC_CBQ_MAXLEVEL && - q->link.undertime == PSCHED_PASTPERFECT) - break; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->link.undertime = PSCHED_PASTPERFECT; - } - - /* No packets in scheduler or nobody wants to give them to us :-( - * Sigh... start watchdog timer in the last case. - */ - - if (sch->q.qlen) { - qdisc_qstats_overlimit(sch); - if (q->wd_expires) - qdisc_watchdog_schedule(&q->watchdog, - now + q->wd_expires); - } - return NULL; -} - -/* CBQ class maintenance routines */ - -static void cbq_adjust_levels(struct cbq_class *this) -{ - if (this == NULL) - return; - - do { - int level = 0; - struct cbq_class *cl; - - cl = this->children; - if (cl) { - do { - if (cl->level > level) - level = cl->level; - } while ((cl = cl->sibling) != this->children); - } - this->level = level + 1; - } while ((this = this->tparent) != NULL); -} - -static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) -{ - struct cbq_class *cl; - unsigned int h; - - if (q->quanta[prio] == 0) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - /* BUGGGG... Beware! This expression suffer of - * arithmetic overflows! - */ - if (cl->priority == prio) { - cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ - q->quanta[prio]; - } - if (cl->quantum <= 0 || - cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { - pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", - cl->common.classid, cl->quantum); - cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; - } - } - } -} - -static void cbq_sync_defmap(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *split = cl->split; - unsigned int h; - int i; - - if (split == NULL) - return; - - for (i = 0; i <= TC_PRIO_MAX; i++) { - if (split->defaults[i] == cl && !(cl->defmap & (1<defaults[i] = NULL; - } - - for (i = 0; i <= TC_PRIO_MAX; i++) { - int level = split->level; - - if (split->defaults[i]) - continue; - - for (h = 0; h < q->clhash.hashsize; h++) { - struct cbq_class *c; - - hlist_for_each_entry(c, &q->clhash.hash[h], - common.hnode) { - if (c->split == split && c->level < level && - c->defmap & (1<defaults[i] = c; - level = c->level; - } - } - } - } -} - -static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask) -{ - struct cbq_class *split = NULL; - - if (splitid == 0) { - split = cl->split; - if (!split) - return; - splitid = split->common.classid; - } - - if (split == NULL || split->common.classid != splitid) { - for (split = cl->tparent; split; split = split->tparent) - if (split->common.classid == splitid) - break; - } - - if (split == NULL) - return; - - if (cl->split != split) { - cl->defmap = 0; - cbq_sync_defmap(cl); - cl->split = split; - cl->defmap = def & mask; - } else - cl->defmap = (cl->defmap & ~mask) | (def & mask); - - cbq_sync_defmap(cl); -} - -static void cbq_unlink_class(struct cbq_class *this) -{ - struct cbq_class *cl, **clp; - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - - qdisc_class_hash_remove(&q->clhash, &this->common); - - if (this->tparent) { - clp = &this->sibling; - cl = *clp; - do { - if (cl == this) { - *clp = cl->sibling; - break; - } - clp = &cl->sibling; - } while ((cl = *clp) != this->sibling); - - if (this->tparent->children == this) { - this->tparent->children = this->sibling; - if (this->sibling == this) - this->tparent->children = NULL; - } - } else { - WARN_ON(this->sibling != this); - } -} - -static void cbq_link_class(struct cbq_class *this) -{ - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - struct cbq_class *parent = this->tparent; - - this->sibling = this; - qdisc_class_hash_insert(&q->clhash, &this->common); - - if (parent == NULL) - return; - - if (parent->children == NULL) { - parent->children = this; - } else { - this->sibling = parent->children->sibling; - parent->children->sibling = this; - } -} - -static void -cbq_reset(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int prio; - unsigned int h; - - q->activemask = 0; - q->pmask = 0; - q->tx_class = NULL; - q->tx_borrowed = NULL; - qdisc_watchdog_cancel(&q->watchdog); - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) - q->active[prio] = NULL; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - qdisc_reset(cl->q); - - cl->next_alive = NULL; - cl->undertime = PSCHED_PASTPERFECT; - cl->avgidle = cl->maxidle; - cl->deficit = cl->quantum; - cl->cpriority = cl->priority; - } - } -} - - -static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) -{ - if (lss->change & TCF_CBQ_LSS_FLAGS) { - cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; - cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; - } - if (lss->change & TCF_CBQ_LSS_EWMA) - cl->ewma_log = lss->ewma_log; - if (lss->change & TCF_CBQ_LSS_AVPKT) - cl->avpkt = lss->avpkt; - if (lss->change & TCF_CBQ_LSS_MINIDLE) - cl->minidle = -(long)lss->minidle; - if (lss->change & TCF_CBQ_LSS_MAXIDLE) { - cl->maxidle = lss->maxidle; - cl->avgidle = lss->maxidle; - } - if (lss->change & TCF_CBQ_LSS_OFFTIME) - cl->offtime = lss->offtime; -} - -static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]--; - q->quanta[cl->priority] -= cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]++; - q->quanta[cl->priority] += cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - - if (wrr->allot) - cl->allot = wrr->allot; - if (wrr->weight) - cl->weight = wrr->weight; - if (wrr->priority) { - cl->priority = wrr->priority - 1; - cl->cpriority = cl->priority; - if (cl->priority >= cl->priority2) - cl->priority2 = TC_CBQ_MAXPRIO - 1; - } - - cbq_addprio(q, cl); - return 0; -} - -static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt) -{ - cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange); - return 0; -} - -static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { - [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) }, - [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) }, - [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) }, - [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) }, - [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) }, - [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, - [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, -}; - -static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], - struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - int err; - - if (!opt) { - NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, - cbq_policy, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_WRROPT]) { - const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); - - if (wrr->priority > TC_CBQ_MAXPRIO) { - NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO"); - err = -EINVAL; - } - } - return err; -} - -static int cbq_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct tc_ratespec *r; - int err; - - qdisc_watchdog_init(&q->watchdog, sch); - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) { - NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete"); - return -EINVAL; - } - - r = nla_data(tb[TCA_CBQ_RATE]); - - q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack); - if (!q->link.R_tab) - return -EINVAL; - - err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack); - if (err) - goto put_rtab; - - err = qdisc_class_hash_init(&q->clhash); - if (err < 0) - goto put_block; - - q->link.sibling = &q->link; - q->link.common.classid = sch->handle; - q->link.qdisc = sch; - q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (!q->link.q) - q->link.q = &noop_qdisc; - else - qdisc_hash_add(q->link.q, true); - - q->link.priority = TC_CBQ_MAXPRIO - 1; - q->link.priority2 = TC_CBQ_MAXPRIO - 1; - q->link.cpriority = TC_CBQ_MAXPRIO - 1; - q->link.allot = psched_mtu(qdisc_dev(sch)); - q->link.quantum = q->link.allot; - q->link.weight = q->link.R_tab->rate.rate; - - q->link.ewma_log = TC_CBQ_DEF_EWMA; - q->link.avpkt = q->link.allot/2; - q->link.minidle = -0x7FFFFFFF; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - cbq_link_class(&q->link); - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT])); - - cbq_addprio(q, &q->link); - return 0; - -put_block: - tcf_block_put(q->link.block); - -put_rtab: - qdisc_put_rtab(q->link.R_tab); - return err; -} - -static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - - if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_lssopt opt; - - opt.flags = 0; - if (cl->borrow == NULL) - opt.flags |= TCF_CBQ_LSS_BOUNDED; - if (cl->share == NULL) - opt.flags |= TCF_CBQ_LSS_ISOLATED; - opt.ewma_log = cl->ewma_log; - opt.level = cl->level; - opt.avpkt = cl->avpkt; - opt.maxidle = cl->maxidle; - opt.minidle = (u32)(-cl->minidle); - opt.offtime = cl->offtime; - opt.change = ~0; - if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_wrropt opt; - - memset(&opt, 0, sizeof(opt)); - opt.flags = 0; - opt.allot = cl->allot; - opt.priority = cl->priority + 1; - opt.cpriority = cl->cpriority + 1; - opt.weight = cl->weight; - if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_fopt opt; - - if (cl->split || cl->defmap) { - opt.split = cl->split ? cl->split->common.classid : 0; - opt.defmap = cl->defmap; - opt.defchange = ~0; - if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt)) - goto nla_put_failure; - } - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) -{ - if (cbq_dump_lss(skb, cl) < 0 || - cbq_dump_rate(skb, cl) < 0 || - cbq_dump_wrr(skb, cl) < 0 || - cbq_dump_fopt(skb, cl) < 0) - return -1; - return 0; -} - -static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *nest; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, &q->link) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - q->link.xstats.avgidle = q->link.avgidle; - return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats)); -} - -static int -cbq_dump_class(struct Qdisc *sch, unsigned long arg, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - struct nlattr *nest; - - if (cl->tparent) - tcm->tcm_parent = cl->tparent->common.classid; - else - tcm->tcm_parent = TC_H_ROOT; - tcm->tcm_handle = cl->common.classid; - tcm->tcm_info = cl->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, cl) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - __u32 qlen; - - cl->xstats.avgidle = cl->avgidle; - cl->xstats.undertime = 0; - qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); - - if (cl->undertime != PSCHED_PASTPERFECT) - cl->xstats.undertime = cl->undertime - q->now; - - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) - return -1; - - return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); -} - -static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old, struct netlink_ext_ack *extack) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack); - if (new == NULL) - return -ENOBUFS; - } - - *old = qdisc_replace(sch, new, &cl->q); - return 0; -} - -static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - return cl->q; -} - -static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cbq_deactivate_class(cl); -} - -static unsigned long cbq_find(struct Qdisc *sch, u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - return (unsigned long)cbq_class_lookup(q, classid); -} - -static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - WARN_ON(cl->filters); - - tcf_block_put(cl->block); - qdisc_put(cl->q); - qdisc_put_rtab(cl->R_tab); - gen_kill_estimator(&cl->rate_est); - if (cl != &q->link) - kfree(cl); -} - -static void cbq_destroy(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct hlist_node *next; - struct cbq_class *cl; - unsigned int h; - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = NULL; -#endif - /* - * Filters must be destroyed first because we don't destroy the - * classes from root to leafs which means that filters can still - * be bound to classes which have been destroyed already. --TGR '04 - */ - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - tcf_block_put(cl->block); - cl->block = NULL; - } - } - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], - common.hnode) - cbq_destroy_class(sch, cl); - } - qdisc_class_hash_destroy(&q->clhash); -} - -static int -cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, - unsigned long *arg, struct netlink_ext_ack *extack) -{ - int err; - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)*arg; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct cbq_class *parent; - struct qdisc_rate_table *rtab = NULL; - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) { - NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params"); - return -EOPNOTSUPP; - } - - if (cl) { - /* Check parent */ - if (parentid) { - if (cl->tparent && - cl->tparent->common.classid != parentid) { - NL_SET_ERR_MSG(extack, "Invalid parent id"); - return -EINVAL; - } - if (!cl->tparent && parentid != TC_H_ROOT) { - NL_SET_ERR_MSG(extack, "Parent must be root"); - return -EINVAL; - } - } - - if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), - tb[TCA_CBQ_RTAB], extack); - if (rtab == NULL) - return -EINVAL; - } - - if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, - true, - tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); - qdisc_put_rtab(rtab); - return err; - } - } - - /* Change class parameters */ - sch_tree_lock(sch); - - if (cl->next_alive != NULL) - cbq_deactivate_class(cl); - - if (rtab) { - qdisc_put_rtab(cl->R_tab); - cl->R_tab = rtab; - } - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - - if (tb[TCA_CBQ_WRROPT]) { - cbq_rmprio(q, cl); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - } - - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - - if (cl->q->q.qlen) - cbq_activate_class(cl); - - sch_tree_unlock(sch); - - return 0; - } - - if (parentid == TC_H_ROOT) - return -EINVAL; - - if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) { - NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing"); - return -EINVAL; - } - - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB], - extack); - if (rtab == NULL) - return -EINVAL; - - if (classid) { - err = -EINVAL; - if (TC_H_MAJ(classid ^ sch->handle) || - cbq_class_lookup(q, classid)) { - NL_SET_ERR_MSG(extack, "Specified class not found"); - goto failure; - } - } else { - int i; - classid = TC_H_MAKE(sch->handle, 0x8000); - - for (i = 0; i < 0x8000; i++) { - if (++q->hgenerator >= 0x8000) - q->hgenerator = 1; - if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) - break; - } - err = -ENOSR; - if (i >= 0x8000) { - NL_SET_ERR_MSG(extack, "Unable to generate classid"); - goto failure; - } - classid = classid|q->hgenerator; - } - - parent = &q->link; - if (parentid) { - parent = cbq_class_lookup(q, parentid); - err = -EINVAL; - if (!parent) { - NL_SET_ERR_MSG(extack, "Failed to find parentid"); - goto failure; - } - } - - err = -ENOBUFS; - cl = kzalloc(sizeof(*cl), GFP_KERNEL); - if (cl == NULL) - goto failure; - - gnet_stats_basic_sync_init(&cl->bstats); - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { - kfree(cl); - goto failure; - } - - if (tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } - } - - cl->R_tab = rtab; - rtab = NULL; - cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - NULL); - if (!cl->q) - cl->q = &noop_qdisc; - else - qdisc_hash_add(cl->q, true); - - cl->common.classid = classid; - cl->tparent = parent; - cl->qdisc = sch; - cl->allot = parent->allot; - cl->quantum = cl->allot; - cl->weight = cl->R_tab->rate.rate; - - sch_tree_lock(sch); - cbq_link_class(cl); - cl->borrow = cl->tparent; - if (cl->tparent != &q->link) - cl->share = cl->tparent; - cbq_adjust_levels(parent); - cl->minidle = -0x7FFFFFFF; - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - if (cl->ewma_log == 0) - cl->ewma_log = q->link.ewma_log; - if (cl->maxidle == 0) - cl->maxidle = q->link.maxidle; - if (cl->avpkt == 0) - cl->avpkt = q->link.avpkt; - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - sch_tree_unlock(sch); - - qdisc_class_hash_grow(sch, &q->clhash); - - *arg = (unsigned long)cl; - return 0; - -failure: - qdisc_put_rtab(rtab); - return err; -} - -static int cbq_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl->filters || cl->children || cl == &q->link) - return -EBUSY; - - sch_tree_lock(sch); - - qdisc_purge_queue(cl->q); - - if (cl->next_alive) - cbq_deactivate_class(cl); - - if (q->tx_borrowed == cl) - q->tx_borrowed = q->tx_class; - if (q->tx_class == cl) { - q->tx_class = NULL; - q->tx_borrowed = NULL; - } -#ifdef CONFIG_NET_CLS_ACT - if (q->rx_class == cl) - q->rx_class = NULL; -#endif - - cbq_unlink_class(cl); - cbq_adjust_levels(cl->tparent); - cl->defmap = 0; - cbq_sync_defmap(cl); - - cbq_rmprio(q, cl); - sch_tree_unlock(sch); - - cbq_destroy_class(sch, cl); - return 0; -} - -static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl == NULL) - cl = &q->link; - - return cl->block; -} - -static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, - u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *p = (struct cbq_class *)parent; - struct cbq_class *cl = cbq_class_lookup(q, classid); - - if (cl) { - if (p && p->level <= cl->level) - return 0; - cl->filters++; - return (unsigned long)cl; - } - return 0; -} - -static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cl->filters--; -} - -static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - unsigned int h; - - if (arg->stop) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) - return; - } - } -} - -static const struct Qdisc_class_ops cbq_class_ops = { - .graft = cbq_graft, - .leaf = cbq_leaf, - .qlen_notify = cbq_qlen_notify, - .find = cbq_find, - .change = cbq_change_class, - .delete = cbq_delete, - .walk = cbq_walk, - .tcf_block = cbq_tcf_block, - .bind_tcf = cbq_bind_filter, - .unbind_tcf = cbq_unbind_filter, - .dump = cbq_dump_class, - .dump_stats = cbq_dump_class_stats, -}; - -static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &cbq_class_ops, - .id = "cbq", - .priv_size = sizeof(struct cbq_sched_data), - .enqueue = cbq_enqueue, - .dequeue = cbq_dequeue, - .peek = qdisc_peek_dequeued, - .init = cbq_init, - .reset = cbq_reset, - .destroy = cbq_destroy, - .change = NULL, - .dump = cbq_dump, - .dump_stats = cbq_dump_stats, - .owner = THIS_MODULE, -}; - -static int __init cbq_module_init(void) -{ - return register_qdisc(&cbq_qdisc_ops); -} -static void __exit cbq_module_exit(void) -{ - unregister_qdisc(&cbq_qdisc_ops); -} -module_init(cbq_module_init) -module_exit(cbq_module_exit) -MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c deleted file mode 100644 index 401ffaf87d62..000000000000 --- a/net/sched/sch_dsmark.c +++ /dev/null @@ -1,518 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_dsmark.c - Differentiated Services field marker */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * classid class marking - * ------- ----- ------- - * n/a 0 n/a - * x:0 1 use entry [0] - * ... ... ... - * x:y y>0 y+1 use entry [y] - * ... ... ... - * x:indices-1 indices use entry [indices-1] - * ... ... ... - * x:y y+1 use entry [y & (indices-1)] - * ... ... ... - * 0xffff 0x10000 use entry [indices-1] - */ - - -#define NO_DEFAULT_INDEX (1 << 16) - -struct mask_value { - u8 mask; - u8 value; -}; - -struct dsmark_qdisc_data { - struct Qdisc *q; - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct mask_value *mv; - u16 indices; - u8 set_tc_index; - u32 default_index; /* index range is 0...0xffff */ -#define DSMARK_EMBEDDED_SZ 16 - struct mask_value embedded[DSMARK_EMBEDDED_SZ]; -}; - -static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) -{ - return index <= p->indices && index > 0; -} - -/* ------------------------- Class/flow operations ------------------------- */ - -static int dsmark_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", - __func__, sch, p, new, old); - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (new == NULL) - new = &noop_qdisc; - } - - *old = qdisc_replace(sch, new, &p->q); - return 0; -} - -static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - return p->q; -} - -static unsigned long dsmark_find(struct Qdisc *sch, u32 classid) -{ - return TC_H_MIN(classid) + 1; -} - -static unsigned long dsmark_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", - __func__, sch, qdisc_priv(sch), classid); - - return dsmark_find(sch, classid); -} - -static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl) -{ -} - -static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = { - [TCA_DSMARK_INDICES] = { .type = NLA_U16 }, - [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 }, - [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG }, - [TCA_DSMARK_MASK] = { .type = NLA_U8 }, - [TCA_DSMARK_VALUE] = { .type = NLA_U8 }, -}; - -static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - - pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", - __func__, sch, p, classid, parent, *arg); - - if (!dsmark_valid_index(p, *arg)) { - err = -ENOENT; - goto errout; - } - - if (!opt) - goto errout; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - if (tb[TCA_DSMARK_VALUE]) - p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]); - - if (tb[TCA_DSMARK_MASK]) - p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]); - - err = 0; - -errout: - return err; -} - -static int dsmark_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - if (!dsmark_valid_index(p, arg)) - return -EINVAL; - - p->mv[arg - 1].mask = 0xff; - p->mv[arg - 1].value = 0; - - return 0; -} - -static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int i; - - pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", - __func__, sch, p, walker); - - if (walker->stop) - return; - - for (i = 0; i < p->indices; i++) { - if (p->mv[i].mask == 0xff && !p->mv[i].value) { - walker->count++; - continue; - } - if (!tc_qdisc_stats_dump(sch, i + 1, walker)) - break; - } -} - -static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - return p->block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - unsigned int len = qdisc_pkt_len(skb); - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); - - if (p->set_tc_index) { - int wlen = skb_network_offset(skb); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - wlen += sizeof(struct iphdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) - & ~INET_ECN_MASK; - break; - - case htons(ETH_P_IPV6): - wlen += sizeof(struct ipv6hdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) - & ~INET_ECN_MASK; - break; - default: - skb->tc_index = 0; - break; - } - } - - if (TC_H_MAJ(skb->priority) == sch->handle) - skb->tc_index = TC_H_MIN(skb->priority); - else { - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tcf_classify(skb, NULL, fl, &res, false); - - pr_debug("result %d class 0x%04x\n", result, res.classid); - - switch (result) { -#ifdef CONFIG_NET_CLS_ACT - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - - case TC_ACT_SHOT: - goto drop; -#endif - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - break; - - default: - if (p->default_index != NO_DEFAULT_INDEX) - skb->tc_index = p->default_index; - break; - } - } - - err = qdisc_enqueue(skb, p->q, to_free); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - qdisc_qstats_drop(sch); - return err; - } - - sch->qstats.backlog += len; - sch->q.qlen++; - - return NET_XMIT_SUCCESS; - -drop: - qdisc_drop(skb, sch, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - u32 index; - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - skb = qdisc_dequeue_peeked(p->q); - if (skb == NULL) - return NULL; - - qdisc_bstats_update(sch, skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; - - index = skb->tc_index & (p->indices - 1); - pr_debug("index %d->%d\n", skb->tc_index, index); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - case htons(ETH_P_IPV6): - ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - default: - /* - * Only complain if a change was actually attempted. - * This way, we can send non-IP traffic through dsmark - * and don't need yet another qdisc as a bypass. - */ - if (p->mv[index].mask != 0xff || p->mv[index].value) - pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb_protocol(skb, true))); - break; - } - - return skb; -} - -static struct sk_buff *dsmark_peek(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - return p->q->ops->peek(p->q); -} - -static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - u32 default_index = NO_DEFAULT_INDEX; - u16 indices; - int i; - - pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); - - if (!opt) - goto errout; - - err = tcf_block_get(&p->block, &p->filter_list, sch, extack); - if (err) - return err; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - err = -EINVAL; - if (!tb[TCA_DSMARK_INDICES]) - goto errout; - indices = nla_get_u16(tb[TCA_DSMARK_INDICES]); - - if (hweight32(indices) != 1) - goto errout; - - if (tb[TCA_DSMARK_DEFAULT_INDEX]) - default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]); - - if (indices <= DSMARK_EMBEDDED_SZ) - p->mv = p->embedded; - else - p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL); - if (!p->mv) { - err = -ENOMEM; - goto errout; - } - for (i = 0; i < indices; i++) { - p->mv[i].mask = 0xff; - p->mv[i].value = 0; - } - p->indices = indices; - p->default_index = default_index; - p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - - p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, - NULL); - if (p->q == NULL) - p->q = &noop_qdisc; - else - qdisc_hash_add(p->q, true); - - pr_debug("%s: qdisc %p\n", __func__, p->q); - - err = 0; -errout: - return err; -} - -static void dsmark_reset(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - if (p->q) - qdisc_reset(p->q); -} - -static void dsmark_destroy(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - tcf_block_put(p->block); - qdisc_put(p->q); - if (p->mv != p->embedded) - kfree(p->mv); -} - -static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); - - if (!dsmark_valid_index(p, cl)) - return -EINVAL; - - tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); - tcm->tcm_info = p->q->handle; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || - nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) - goto nla_put_failure; - - if (p->default_index != NO_DEFAULT_INDEX && - nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index)) - goto nla_put_failure; - - if (p->set_tc_index && - nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static const struct Qdisc_class_ops dsmark_class_ops = { - .graft = dsmark_graft, - .leaf = dsmark_leaf, - .find = dsmark_find, - .change = dsmark_change, - .delete = dsmark_delete, - .walk = dsmark_walk, - .tcf_block = dsmark_tcf_block, - .bind_tcf = dsmark_bind_filter, - .unbind_tcf = dsmark_unbind_filter, - .dump = dsmark_dump_class, -}; - -static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &dsmark_class_ops, - .id = "dsmark", - .priv_size = sizeof(struct dsmark_qdisc_data), - .enqueue = dsmark_enqueue, - .dequeue = dsmark_dequeue, - .peek = dsmark_peek, - .init = dsmark_init, - .reset = dsmark_reset, - .destroy = dsmark_destroy, - .change = NULL, - .dump = dsmark_dump, - .owner = THIS_MODULE, -}; - -static int __init dsmark_module_init(void) -{ - return register_qdisc(&dsmark_qdisc_ops); -} - -static void __exit dsmark_module_exit(void) -{ - unregister_qdisc(&dsmark_qdisc_ops); -} - -module_init(dsmark_module_init) -module_exit(dsmark_module_exit) - -MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8d5eebb2dd1b..1d4638aa4254 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -765,7 +765,8 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { - [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_TAPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32, + TC_QOPT_MAX_QUEUE), [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 }, }; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 9b8999e2afca..867df4522815 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -155,10 +155,12 @@ static int smc_clc_ueid_remove(char *ueid) rc = 0; } } +#if IS_ENABLED(CONFIG_S390) if (!rc && !smc_clc_eid_table.ueid_cnt) { smc_clc_eid_table.seid_enabled = 1; rc = -EAGAIN; /* indicate success and enabling of seid */ } +#endif write_unlock(&smc_clc_eid_table.lock); return rc; } @@ -273,22 +275,30 @@ err: int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) { +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); smc_clc_eid_table.seid_enabled = 1; write_unlock(&smc_clc_eid_table.lock); return 0; +#else + return -EOPNOTSUPP; +#endif } int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) { int rc = 0; +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); if (!smc_clc_eid_table.ueid_cnt) rc = -ENOENT; else smc_clc_eid_table.seid_enabled = 0; write_unlock(&smc_clc_eid_table.lock); +#else + rc = -EOPNOTSUPP; +#endif return rc; } @@ -1168,7 +1178,11 @@ void __init smc_clc_init(void) INIT_LIST_HEAD(&smc_clc_eid_table.list); rwlock_init(&smc_clc_eid_table.lock); smc_clc_eid_table.ueid_cnt = 0; +#if IS_ENABLED(CONFIG_S390) smc_clc_eid_table.seid_enabled = 1; +#else + smc_clc_eid_table.seid_enabled = 0; +#endif } void smc_clc_exit(void) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 801044e7d194..7a907186a33a 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -163,7 +163,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, } if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && - !list_empty(&smc->conn.lgr->list)) { + !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d435bffc6199..97ff11973c49 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -284,10 +284,10 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) } if (snprintf(portbuf, sizeof(portbuf), - ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf)) + ".%u.%u", port >> 8, port & 0xff) >= (int)sizeof(portbuf)) return NULL; - if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) + if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) >= sizeof(addrbuf)) return NULL; return kstrdup(addrbuf, gfp_flags); diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index d79f12c2550a..cb32ab9a8395 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { - kfree(oa->data); - return -ENOMEM; + err = -ENOMEM; + goto free_oa; } oa->data[0].option.data = CREDS_VALUE; @@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, /* option buffer */ p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - return -ENOSPC; + if (unlikely(p == NULL)) { + err = -ENOSPC; + goto free_creds; + } length = be32_to_cpup(p); p = xdr_inline_decode(xdr, length); - if (unlikely(p == NULL)) - return -ENOSPC; + if (unlikely(p == NULL)) { + err = -ENOSPC; + goto free_creds; + } if (length == sizeof(CREDS_VALUE) && memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) { /* We have creds here. parse them */ err = gssx_dec_linux_creds(xdr, creds); if (err) - return err; + goto free_creds; oa->data[0].value.len = 1; /* presence */ } else { /* consume uninteresting buffer */ err = gssx_dec_buffer(xdr, &dummy); if (err) - return err; + goto free_creds; } } return 0; + +free_creds: + kfree(creds); +free_oa: + kfree(oa->data); + oa->data = NULL; + return err; } static int gssx_dec_status(struct xdr_stream *xdr, diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 74ee2271251e..720d3ba742ec 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -336,8 +336,9 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) xprt_switch_find_current_entry_offline); } -bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, - const struct sockaddr *sap) +static +bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) { struct list_head *head; struct rpc_xprt *pos; @@ -356,6 +357,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, return false; } +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) +{ + bool res; + + rcu_read_lock(); + res = __rpc_xprt_switch_has_addr(xps, sap); + rcu_read_unlock(); + + return res; +} + static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur, bool check_active) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8cc42aea19c7..2e14d4c37e2d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,6 +19,35 @@ #include #include +static bool switchdev_obj_eq(const struct switchdev_obj *a, + const struct switchdev_obj *b) +{ + const struct switchdev_obj_port_vlan *va, *vb; + const struct switchdev_obj_port_mdb *ma, *mb; + + if (a->id != b->id || a->orig_dev != b->orig_dev) + return false; + + switch (a->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + va = SWITCHDEV_OBJ_PORT_VLAN(a); + vb = SWITCHDEV_OBJ_PORT_VLAN(b); + return va->flags == vb->flags && + va->vid == vb->vid && + va->changed == vb->changed; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + ma = SWITCHDEV_OBJ_PORT_MDB(a); + mb = SWITCHDEV_OBJ_PORT_MDB(b); + return ma->vid == mb->vid && + ether_addr_equal(ma->addr, mb->addr); + default: + break; + } + + BUG(); +} + static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_act_is_deferred - Is object action pending? + * + * @dev: port device + * @nt: type of action; add or delete + * @obj: object to test + * + * Returns true if a deferred item is pending, which is + * equivalent to the action @nt on an object @obj. + * + * rtnl_lock must be held. + */ +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj) +{ + struct switchdev_deferred_item *dfitem; + bool found = false; + + ASSERT_RTNL(); + + spin_lock_bh(&deferred_lock); + + list_for_each_entry(dfitem, &deferred, list) { + if (dfitem->dev != dev) + continue; + + if ((dfitem->func == switchdev_port_obj_add_deferred && + nt == SWITCHDEV_PORT_OBJ_ADD) || + (dfitem->func == switchdev_port_obj_del_deferred && + nt == SWITCHDEV_PORT_OBJ_DEL)) { + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { + found = true; + break; + } + } + } + + spin_unlock_bh(&deferred_lock); + + return found; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index cdcd2731860b..1cb993562088 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1088,6 +1088,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); + return -EINVAL; + } + err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); if (err) { diff --git a/net/tls/tls.h b/net/tls/tls.h index 0672acab2773..4922668fefaa 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -97,6 +97,7 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +void tls_sw_splice_eof(struct socket *sock); int tls_sw_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tls_sw_sendpage(struct sock *sk, struct page *page, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 338a443fa47b..6b7189a520af 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -922,6 +922,7 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] ops[TLS_BASE][TLS_BASE] = *base; ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; + ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof; ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked; ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; @@ -990,6 +991,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; + prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof; prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; @@ -1096,7 +1098,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0323040d34bc..bdb5153f3788 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -51,6 +51,7 @@ struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; + bool async_done; u8 tail; ); @@ -62,6 +63,7 @@ struct tls_decrypt_ctx { u8 iv[MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -186,7 +188,6 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) struct aead_request *aead_req = crypto_get_completion_data(data); struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; @@ -195,6 +196,17 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) struct sock *sk; int aead_size; + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); @@ -212,7 +224,7 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -223,10 +235,17 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); +} + +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + if (!atomic_dec_and_test(&ctx->decrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); + + return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, @@ -252,6 +271,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -260,12 +280,19 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); - if (ret == -EINPROGRESS) { - if (darg->async) - return 0; + if (ret == -EINPROGRESS) + return 0; - ret = crypto_wait_req(ret, &ctx->async_wait); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + darg->async_done = true; + /* all completions have run, we're not doing async anymore */ + darg->async = false; + return ret; + ret = ret ?: -EINPROGRESS; } + + atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; @@ -439,9 +466,10 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) struct scatterlist *sge; struct sk_msg *msg_en; struct tls_rec *rec; - bool ready = false; struct sock *sk; - int pending; + + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; @@ -477,23 +505,25 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (rec == first_rec) - ready = true; + if (rec == first_rec) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &ctx->tx_bitmask)) + schedule_delayed_work(&ctx->tx_work.work, 1); + } } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); +} - if (!ready) - return; +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + if (!atomic_dec_and_test(&ctx->encrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->encrypt_pending); - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) - schedule_delayed_work(&ctx->tx_work.work, 1); + return ctx->async_wait.err; } static int tls_do_encryption(struct sock *sk, @@ -542,9 +572,14 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; @@ -953,7 +988,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int num_zc = 0; int orig_size; int ret = 0; - int pending; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_COMPAT)) @@ -1122,24 +1156,12 @@ trim_sgl: if (!num_async) { goto send_end; } else if (num_zc) { + int err; + /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1158,6 +1180,67 @@ send_end: return copied > 0 ? copied : ret; } +/* + * Handle unexpected EOF during splice without SPLICE_F_MORE set. + */ +void tls_sw_splice_eof(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_rec *rec; + struct sk_msg *msg_pl; + ssize_t copied = 0; + bool retrying = false; + int ret = 0; + + if (!ctx->open_rec) + return; + + mutex_lock(&tls_ctx->tx_lock); + lock_sock(sk); + +retry: + /* same checks as in tls_sw_push_pending_record() */ + rec = ctx->open_rec; + if (!rec) + goto unlock; + + msg_pl = &rec->msg_plaintext; + if (msg_pl->sg.size == 0) + goto unlock; + + /* Check the BPF advisor and perform transmission. */ + ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, + &copied, 0); + switch (ret) { + case 0: + case -EAGAIN: + if (retrying) + goto unlock; + retrying = true; + goto retry; + case -EINPROGRESS: + break; + default: + goto unlock; + } + + /* Wait for pending encryptions to get completed */ + if (tls_encrypt_async_wait(ctx)) + goto unlock; + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, 0); + } + +unlock: + release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); +} + static int tls_sw_do_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1595,12 +1678,16 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); - if (err) + if (err) { + if (darg->async_done) + goto exit_free_skb; goto exit_free_pages; + } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; @@ -1612,6 +1699,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, return err; } + if (unlikely(darg->async_done)) + return 0; + if (prot->tail_size) darg->tail = dctx->tail; @@ -1783,7 +1873,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1796,7 +1887,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1814,12 +1905,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1855,6 +1946,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1954,10 +2049,12 @@ int tls_sw_recvmsg(struct sock *sk, struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; + ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1977,12 +2074,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2075,6 +2172,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } @@ -2098,8 +2197,10 @@ put_on_rx_list: if (err < 0) goto put_on_rx_list_err; - if (is_peek) + if (is_peek) { + peeked += chunk; goto put_on_rx_list; + } if (partially_consumed) { rxm->offset += chunk; @@ -2123,16 +2224,10 @@ put_on_rx_list: recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { @@ -2144,12 +2239,11 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); - decrypted += max(err, 0); + async_copy_bytes, is_peek, NULL); } copied += decrypted; @@ -2351,16 +2445,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); @@ -2513,6 +2600,48 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx) tls_ctx->prot_info.version != TLS_1_3_VERSION; } +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); + init_waitqueue_head(&sw_ctx_rx->wq); + skb_queue_head_init(&sw_ctx_rx->rx_list); + skb_queue_head_init(&sw_ctx_rx->async_hold); + + return sw_ctx_rx; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2534,48 +2663,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); - init_waitqueue_head(&sw_ctx_rx->wq); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); - skb_queue_head_init(&sw_ctx_rx->async_hold); aead = &sw_ctx_rx->aead_recv; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index be2ed7b0fe21..e1af94393789 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1322,13 +1322,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock_nested(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock_nested(sk1); - } + if (sk1 > sk2) + swap(sk1, sk2); + + unix_state_lock(sk1); + unix_state_lock_nested(sk2, U_LOCK_SECOND); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1559,7 +1557,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk); + unix_state_lock_nested(sk, U_LOCK_SECOND); if (sk->sk_state != st) { unix_state_unlock(sk); diff --git a/net/unix/diag.c b/net/unix/diag.c index 616b55c5b890..3438b7af09af 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) * queue lock. With the other's queue locked it's * OK to lock the state. */ - unix_state_lock_nested(req); + unix_state_lock_nested(req, U_LOCK_DIAG); peer = unix_sk(req)->peer; buf[i++] = (peer ? sock_i_ino(peer) : 0); unix_state_unlock(req); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dc2763540393..9bfffe2a7f02 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -198,7 +198,7 @@ void wait_for_unix_gc(void) if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && !READ_ONCE(gc_in_progress)) unix_gc(); - wait_event(unix_gc_wait, gc_in_progress == false); + wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress)); } /* The external entry point: unix_gc() */ @@ -284,9 +284,17 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) + list_for_each_entry(u, &gc_candidates, link) { scan_children(&u->sk, inc_inflight, &hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif + } + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ diff --git a/net/unix/scm.c b/net/unix/scm.c index e8e2a00bb0f5..d1048b4c2baa 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -34,10 +34,8 @@ struct sock *unix_get_socket(struct file *filp) /* PF_UNIX ? */ if (s && sock->ops && sock->ops->family == PF_UNIX) u_sock = s; - } else { - /* Could be an io_uring instance */ - u_sock = io_uring_get_socket(filp); } + return u_sock; } EXPORT_SYMBOL(unix_get_socket); diff --git a/net/wireless/core.c b/net/wireless/core.c index 8809e668ed91..3fcddc8687ed 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1671,6 +1671,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, unsigned long delay) { if (!delay) { + del_timer(&dwork->timer); wiphy_work_queue(wiphy, &dwork->work); return; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 70fb14b8bab0..1a3bd554e258 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3960,6 +3960,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; } + if_start = 0; wp_idx++; } out: @@ -4136,6 +4137,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; + if (otype != NL80211_IFTYPE_MESH_POINT) + return -EINVAL; if (netif_running(dev)) return -EBUSY; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b7e1631b3d80..3ad4c1032c03 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1810,8 +1810,12 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, list_add(&new->hidden_list, &hidden->hidden_list); hidden->refcount++; + + ies = (void *)rcu_access_pointer(new->pub.beacon_ies); rcu_assign_pointer(new->pub.beacon_ies, hidden->pub.beacon_ies); + if (ies) + kfree_rcu(ies, rcu_head); } } else { /* diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5c7ad301d742..5a8b2ea56564 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -460,12 +460,12 @@ static int x25_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) goto out; - len = min_t(unsigned int, len, sizeof(int)); - rc = -EINVAL; if (len < 0) goto out; + len = min_t(unsigned int, len, sizeof(int)); + rc = -EFAULT; if (put_user(len, optlen)) goto out; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ac1a645afa8d..d0320e35accb 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int optlen = 0; int err = -EINVAL; + skb->protocol = htons(ETH_P_IP); + if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { struct ip_beet_phdr *ph; int phlen; @@ -232,8 +234,7 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) - goto out; + skb->protocol = htons(ETH_P_IP); if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -270,8 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) - goto out; + skb->protocol = htons(ETH_P_IPV6); + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -301,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int size = sizeof(struct ipv6hdr); int err; + skb->protocol = htons(ETH_P_IPV6); + err = skb_cow_head(skb, size + skb->mac_len); if (err) goto out; @@ -332,22 +335,26 @@ out: */ static int xfrm_inner_mode_encap_remove(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: - if (inner_mode->family == AF_INET) + switch (x->sel.family) { + case AF_INET: return xfrm4_remove_beet_encap(x, skb); - if (inner_mode->family == AF_INET6) + case AF_INET6: return xfrm6_remove_beet_encap(x, skb); + } break; case XFRM_MODE_TUNNEL: - if (inner_mode->family == AF_INET) + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: return xfrm4_remove_tunnel_encap(x, skb); - if (inner_mode->family == AF_INET6) + case IPPROTO_IPV6: return xfrm6_remove_tunnel_encap(x, skb); break; + } + return -EINVAL; } WARN_ON_ONCE(1); @@ -356,9 +363,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode = &x->inner_mode; - - switch (x->outer_mode.family) { + switch (x->props.family) { case AF_INET: xfrm4_extract_header(skb); break; @@ -370,25 +375,7 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; } - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (!inner_mode) - return -EAFNOSUPPORT; - } - - switch (inner_mode->family) { - case AF_INET: - skb->protocol = htons(ETH_P_IP); - break; - case AF_INET6: - skb->protocol = htons(ETH_P_IPV6); - break; - default: - WARN_ON_ONCE(1); - break; - } - - return xfrm_inner_mode_encap_remove(x, inner_mode, skb); + return xfrm_inner_mode_encap_remove(x, skb); } /* Remove encapsulation header. @@ -434,17 +421,16 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } static int xfrm_inner_mode_input(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: return xfrm_prepare_input(x, skb); case XFRM_MODE_TRANSPORT: - if (inner_mode->family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_input(x, skb); - if (inner_mode->family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_input(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: @@ -462,7 +448,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { const struct xfrm_state_afinfo *afinfo; struct net *net = dev_net(skb->dev); - const struct xfrm_mode *inner_mode; int err; __be32 seq; __be32 seq_hi; @@ -492,7 +477,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - family = x->outer_mode.family; + family = x->props.family; /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { @@ -676,17 +661,7 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - inner_mode = &x->inner_mode; - - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - } - - if (xfrm_inner_mode_input(x, inner_mode, skb)) { + if (xfrm_inner_mode_input(x, skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -701,7 +676,7 @@ resume: * transport mode so the outer address is identical. */ daddr = &x->id.daddr; - family = x->outer_mode.family; + family = x->props.family; err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { @@ -732,7 +707,7 @@ resume: err = -EAFNOSUPPORT; rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family); + afinfo = xfrm_state_afinfo_get_rcu(x->props.family); if (likely(afinfo)) err = afinfo->transport_finish(skb, xfrm_gro || async); rcu_read_unlock(); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9a5e79a38c67..07a7ee43b8ae 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -414,7 +414,7 @@ static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; skb->protocol = htons(ETH_P_IP); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm4_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -437,7 +437,7 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) skb->ignore_df = 1; skb->protocol = htons(ETH_P_IPV6); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm6_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -453,22 +453,22 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) { - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_prepare_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_prepare_output(x, skb); break; case XFRM_MODE_TRANSPORT: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_output(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_ro_output(x, skb); WARN_ON_ONCE(1); break; @@ -866,21 +866,10 @@ static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode; - - if (x->sel.family == AF_UNSPEC) - inner_mode = xfrm_ip2inner_mode(x, - xfrm_af2proto(skb_dst(skb)->ops->family)); - else - inner_mode = &x->inner_mode; - - if (inner_mode == NULL) - return -EAFNOSUPPORT; - - switch (inner_mode->family) { - case AF_INET: + switch (skb->protocol) { + case htons(ETH_P_IP): return xfrm4_extract_output(x, skb); - case AF_INET6: + case htons(ETH_P_IPV6): return xfrm6_extract_output(x, skb); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d042ca01211f..0cc4ed29e901 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1979,6 +1979,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) if (xp->xfrm_nr == 0) return 0; + if (xp->xfrm_nr > XFRM_MAX_DEPTH) + return -ENOBUFS; + for (i = 0; i < xp->xfrm_nr; i++) { struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7048bb3594d6..634e81d83efd 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -4,14 +4,14 @@ #define __ASM_GOTO_WORKAROUND_H /* - * This will bring in asm_volatile_goto and asm_inline macro definitions + * This will bring in asm_goto_output and asm_inline macro definitions * if enabled by compiler and config options. */ #include -#ifdef asm_volatile_goto -#undef asm_volatile_goto -#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#ifdef asm_goto_output +#undef asm_goto_output +#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output") #endif /* diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6bbba36c5969..fa5ef4180688 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -65,6 +65,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict) +KBUILD_CFLAGS += -Wno-enum-compare-conditional +KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index e41dee64d429..39aea753d0bd 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -44,6 +44,7 @@ modpost-args = \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_NSDEPS),-d $(MODULES_NSDEPS)) \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ + $(if $(findstring 1, $(KBUILD_EXTRA_WARN)),-W) \ -o $@ # 'make -i -k' ignores compile errors, and builds as many modules as possible. diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index d5c389df6045..4de98b7bbea9 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -495,7 +495,7 @@ eBPF programs can have an associated license, passed along with the bytecode instructions to the kernel when the programs are loaded. The format for that string is identical to the one in use for kernel modules (Dual licenses, such as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). +programs that are compatible with the GNU General Public License (GNU GPL). In order to use such helpers, the eBPF program must be loaded with the correct license string passed (via **attr**) to the **bpf**\ () system call, and this diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1e5e66ae5a52..ecf4250b0d2d 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4971,7 +4971,7 @@ sub process { if|for|while|switch|return|case| volatile|__volatile__| __attribute__|format|__extension__| - asm|__asm__)$/x) + asm|__asm__|scoped_guard)$/x) { # cpp #define statements have non-optional spaces, ie # if there is a space between the name and the open diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index d800b2c0af97..4f414ab706bd 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -170,7 +170,7 @@ def process_line(root_directory, command_prefix, file_path): # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the # kernel version). The compile_commands.json file is not interepreted # by Make, so this code replaces the escaped version with '#'. - prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') + prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#') # Use os.path.abspath() to normalize the path resolving '.' and '..' . abs_path = os.path.abspath(os.path.join(root_directory, file_path)) diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl index 0ffd5531242a..408bfd0216da 100755 --- a/scripts/get_abi.pl +++ b/scripts/get_abi.pl @@ -98,7 +98,7 @@ sub parse_abi { $name =~ s,.*/,,; my $fn = $file; - $fn =~ s,Documentation/ABI/,,; + $fn =~ s,.*Documentation/ABI/,,; my $nametag = "File $fn"; $data{$nametag}->{what} = "File $name"; diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l index cc386e443683..2c2b3e6f248c 100644 --- a/scripts/kconfig/lexer.l +++ b/scripts/kconfig/lexer.l @@ -302,8 +302,11 @@ static char *expand_token(const char *in, size_t n) new_string(); append_string(in, n); - /* get the whole line because we do not know the end of token. */ - while ((c = input()) != EOF) { + /* + * get the whole line because we do not know the end of token. + * input() returns 0 (not EOF!) when it reachs the end of file. + */ + while ((c = input()) != 0) { if (c == '\n') { unput(c); break; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 32e573943cf0..458b2948b580 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -135,8 +135,13 @@ gen_btf() ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \ --strip-all ${1} ${2} 2>/dev/null # Change e_type to ET_REL so that it can be used to link final vmlinux. - # Unlike GNU ld, lld does not allow an ET_EXEC input. - printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none + # GNU ld 2.35+ and lld do not allow an ET_EXEC input. + if is_enabled CONFIG_CPU_BIG_ENDIAN; then + et_rel='\0\1' + else + et_rel='\1\0' + fi + printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none } # Create ${2} .S file with all symbols from the ${1} object file diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e6be7fc2625f..686eed37f978 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -41,6 +41,8 @@ static bool allow_missing_ns_imports; static bool error_occurred; +static bool extra_warn; + /* * Cut off the warnings when there are too many. This typically occurs when * vmlinux is missing. ('make modules' without building vmlinux.) @@ -809,7 +811,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_INIT_TEXT_SECTIONS \ ".init.text", ".meminit.text" #define ALL_EXIT_TEXT_SECTIONS \ - ".exit.text", ".memexit.text" + ".exit.text" #define ALL_PCI_INIT_SECTIONS \ ".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \ @@ -817,23 +819,22 @@ static void check_section(const char *modname, struct elf_info *elf, ".pci_fixup_resume_early", ".pci_fixup_suspend" #define ALL_XXXINIT_SECTIONS MEM_INIT_SECTIONS -#define ALL_XXXEXIT_SECTIONS MEM_EXIT_SECTIONS #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS -#define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS +#define ALL_EXIT_SECTIONS EXIT_SECTIONS #define DATA_SECTIONS ".data", ".data.rel" -#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ - ".kprobes.text", ".cpuidle.text", ".noinstr.text" +#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ + ".kprobes.text", ".cpuidle.text", ".noinstr.text", \ + ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" #define INIT_SECTIONS ".init.*" #define MEM_INIT_SECTIONS ".meminit.*" #define EXIT_SECTIONS ".exit.*" -#define MEM_EXIT_SECTIONS ".memexit.*" #define ALL_TEXT_SECTIONS ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \ TEXT_SECTIONS, OTHER_TEXT_SECTIONS @@ -862,7 +863,6 @@ enum mismatch { TEXT_TO_ANY_EXIT, DATA_TO_ANY_EXIT, XXXINIT_TO_SOME_INIT, - XXXEXIT_TO_SOME_EXIT, ANY_INIT_TO_ANY_EXIT, ANY_EXIT_TO_ANY_INIT, EXPORT_TO_INIT_EXIT, @@ -937,12 +937,6 @@ static const struct sectioncheck sectioncheck[] = { .bad_tosec = { INIT_SECTIONS, NULL }, .mismatch = XXXINIT_TO_SOME_INIT, }, -/* Do not reference exit code/data from memexit code/data */ -{ - .fromsec = { ALL_XXXEXIT_SECTIONS, NULL }, - .bad_tosec = { EXIT_SECTIONS, NULL }, - .mismatch = XXXEXIT_TO_SOME_EXIT, -}, /* Do not use exit code/data from init code */ { .fromsec = { ALL_INIT_SECTIONS, NULL }, @@ -1085,9 +1079,20 @@ static int secref_whitelist(const struct sectioncheck *mismatch, "*_console"))) return 0; - /* symbols in data sections that may refer to meminit/exit sections */ + /* symbols in data sections that may refer to meminit sections */ if (match(fromsec, PATTERNS(DATA_SECTIONS)) && - match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_EXIT_SECTIONS)) && + match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS)) && + match(fromsym, PATTERNS("*driver"))) + return 0; + + /* + * symbols in data sections must not refer to .exit.*, but there are + * quite a few offenders, so hide these unless for W=1 builds until + * these are fixed. + */ + if (!extra_warn && + match(fromsec, PATTERNS(DATA_SECTIONS)) && + match(tosec, PATTERNS(EXIT_SECTIONS)) && match(fromsym, PATTERNS("*driver"))) return 0; @@ -1254,7 +1259,6 @@ static void report_sec_mismatch(const char *modname, case TEXT_TO_ANY_EXIT: case DATA_TO_ANY_EXIT: case XXXINIT_TO_SOME_INIT: - case XXXEXIT_TO_SOME_EXIT: case ANY_INIT_TO_ANY_EXIT: case ANY_EXIT_TO_ANY_INIT: warn("%s: section mismatch in reference: %s (section: %s) -> %s (section: %s)\n", @@ -2290,7 +2294,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:mnT:o:awENd:")) != -1) { + while ((opt = getopt(argc, argv, "ei:mnT:o:aWwENd:")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2315,6 +2319,9 @@ int main(int argc, char **argv) case 'T': files_source = optarg; break; + case 'W': + extra_warn = true; + break; case 'w': warn_unresolved = true; break; diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 6bf9caca0968..a72e6cf61a1f 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) /* Sum all files in the same dir or subdirs. */ while ((line = get_line(&pos))) { - char* p = line; + char* p; + + /* trim the leading spaces away */ + while (isspace(*line)) + line++; + p = line; if (strncmp(line, "source_", sizeof("source_")-1) == 0) { p = strrchr(line, ' '); diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f42359f58eb5..d468c8b90298 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -226,7 +226,7 @@ void aa_policy_destroy(struct aa_policy *policy); */ #define fn_label_build(L, P, GFP, FN) \ ({ \ - __label__ __cleanup, __done; \ + __label__ __do_cleanup, __done; \ struct aa_label *__new_; \ \ if ((L)->size > 1) { \ @@ -244,7 +244,7 @@ void aa_policy_destroy(struct aa_policy *policy); __new_ = (FN); \ AA_BUG(!__new_); \ if (IS_ERR(__new_)) \ - goto __cleanup; \ + goto __do_cleanup; \ __lvec[__j++] = __new_; \ } \ for (__j = __count = 0; __j < (L)->size; __j++) \ @@ -266,7 +266,7 @@ void aa_policy_destroy(struct aa_policy *policy); vec_cleanup(profile, __pvec, __count); \ } else \ __new_ = NULL; \ -__cleanup: \ +__do_cleanup: \ vec_cleanup(label, __lvec, (L)->size); \ } else { \ (P) = labels_profile(L); \ diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 64ed7665455f..d328965f32f7 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -824,8 +824,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; - layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], - layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; + layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, + layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; if (!dom) return 0; diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 2ca0ccbd905a..d0cb3d0cbf98 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -32,6 +32,18 @@ #include "ruleset.h" #include "setup.h" +static bool is_initialized(void) +{ + if (likely(landlock_initialized)) + return true; + + pr_warn_once( + "Disabled but requested by user space. " + "You should enable Landlock at boot time: " + "https://docs.kernel.org/userspace-api/landlock.html#boot-time-configuration\n"); + return false; +} + /** * copy_min_struct_from_user - Safe future-proof argument copying * @@ -165,7 +177,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset, /* Build-time checks. */ build_check_abi(); - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; if (flags) { @@ -311,7 +323,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, struct landlock_ruleset *ruleset; int res, err; - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; /* No flag for now. */ @@ -402,7 +414,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, struct landlock_cred_security *new_llcred; int err; - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; /* diff --git a/security/security.c b/security/security.c index 5fa286ae9908..1b504c296551 100644 --- a/security/security.c +++ b/security/security.c @@ -1569,6 +1569,24 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL_GPL(security_file_ioctl); +/** + * security_file_ioctl_compat() - Check if an ioctl is allowed in compat mode + * @file: associated file + * @cmd: ioctl cmd + * @arg: ioctl arguments + * + * Compat version of security_file_ioctl() that correctly handles 32-bit + * processes running on 64-bit kernels. + * + * Return: Returns 0 if permission is granted. + */ +int security_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return call_int_hook(file_ioctl_compat, 0, file, cmd, arg); +} +EXPORT_SYMBOL_GPL(security_file_ioctl_compat); + static inline unsigned long mmap_prot(struct file *file, unsigned long prot) { /* @@ -2168,7 +2186,19 @@ EXPORT_SYMBOL(security_inode_setsecctx); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) { + rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(inode_getsecctx)) + return rc; + } + + return LSM_RET_DEFAULT(inode_getsecctx); } EXPORT_SYMBOL(security_inode_getsecctx); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d45e9fa74e62..78f3da39b031 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3718,6 +3718,33 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, return error; } +static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg) +{ + /* + * If we are in a 64-bit kernel running 32-bit userspace, we need to + * make sure we don't compare 32-bit flags to 64-bit flags. + */ + switch (cmd) { + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + case FS_IOC32_GETVERSION: + cmd = FS_IOC_GETVERSION; + break; + case FS_IOC32_SETVERSION: + cmd = FS_IOC_SETVERSION; + break; + default: + break; + } + + return selinux_file_ioctl(file, cmd, arg); +} + static int default_noexec __ro_after_init; static int file_map_prot_check(struct file *file, unsigned long prot, int shared) @@ -7135,6 +7162,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(file_permission, selinux_file_permission), LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security), LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat), LSM_HOOK_INIT(mmap_file, selinux_mmap_file), LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr), LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index cd6a03e945eb..feba69549d08 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1309,7 +1309,8 @@ static int smack_inode_setxattr(struct user_namespace *mnt_userns, check_star = 1; } else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) { check_priv = 1; - if (size != TRANS_TRUE_SIZE || + if (!S_ISDIR(d_backing_inode(dentry)->i_mode) || + size != TRANS_TRUE_SIZE || strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) rc = -EINVAL; } else @@ -2782,6 +2783,15 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, if (value == NULL || size > SMK_LONGLABEL || size == 0) return -EINVAL; + if (strcmp(name, XATTR_SMACK_TRANSMUTE) == 0) { + if (!S_ISDIR(inode->i_mode) || size != TRANS_TRUE_SIZE || + strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) + return -EINVAL; + + nsp->smk_flags |= SMK_INODE_TRANSMUTE; + return 0; + } + skp = smk_import_entry(value, size); if (IS_ERR(skp)) return PTR_ERR(skp); @@ -4905,6 +4915,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security), LSM_HOOK_INIT(file_ioctl, smack_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, smack_file_ioctl), LSM_HOOK_INIT(file_lock, smack_file_lock), LSM_HOOK_INIT(file_fcntl, smack_file_fcntl), LSM_HOOK_INIT(mmap_file, smack_mmap_file), diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index f4cd9b58b205..a7af085550b2 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -2648,13 +2648,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, { int error = buffer_len; size_t avail_len = buffer_len; - char *cp0 = head->write_buf; + char *cp0; int idx; if (!head->write) return -EINVAL; if (mutex_lock_interruptible(&head->io_sem)) return -EINTR; + cp0 = head->write_buf; head->read_user_buf_avail = 0; idx = tomoyo_read_lock(); /* Read a line and dispatch it to the policy handler. */ diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 71e82d855ebf..fb599401bc7d 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -555,6 +555,7 @@ static struct security_hook_list tomoyo_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(path_rename, tomoyo_path_rename), LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr), LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl), LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), LSM_HOOK_INIT(path_chown, tomoyo_path_chown), LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), diff --git a/sound/core/Makefile b/sound/core/Makefile index 2762f03d9b7b..a7a1590b2952 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -30,7 +30,6 @@ snd-ctl-led-objs := control_led.o snd-rawmidi-objs := rawmidi.o snd-timer-objs := timer.o snd-hrtimer-objs := hrtimer.o -snd-rtctimer-objs := rtctimer.o snd-hwdep-objs := hwdep.o snd-seq-device-objs := seq_device.o diff --git a/sound/core/seq/seq_midi.c b/sound/core/seq/seq_midi.c index 4589aac09154..b00bbf18a6f5 100644 --- a/sound/core/seq/seq_midi.c +++ b/sound/core/seq/seq_midi.c @@ -112,6 +112,12 @@ static int dump_midi(struct snd_rawmidi_substream *substream, const char *buf, i return 0; } +/* callback for snd_seq_dump_var_event(), bridging to dump_midi() */ +static int __dump_midi(void *ptr, void *buf, int count) +{ + return dump_midi(ptr, buf, count); +} + static int event_process_midi(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { @@ -131,7 +137,7 @@ static int event_process_midi(struct snd_seq_event *ev, int direct, pr_debug("ALSA: seq_midi: invalid sysex event flags = 0x%x\n", ev->flags); return 0; } - snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)dump_midi, substream); + snd_seq_dump_var_event(ev, __dump_midi, substream); snd_midi_event_reset_decode(msynth->parser); } else { if (msynth->parser == NULL) diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index f5cae49500c8..ffd8e7202c33 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -62,6 +62,13 @@ static void snd_virmidi_init_event(struct snd_virmidi *vmidi, /* * decode input event and put to read buffer of each opened file */ + +/* callback for snd_seq_dump_var_event(), bridging to snd_rawmidi_receive() */ +static int dump_to_rawmidi(void *ptr, void *buf, int count) +{ + return snd_rawmidi_receive(ptr, buf, count); +} + static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, struct snd_seq_event *ev, bool atomic) @@ -80,7 +87,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) continue; - snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); + snd_seq_dump_var_event(ev, dump_to_rawmidi, vmidi->substream); snd_midi_event_reset_decode(vmidi->parser); } else { len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev); diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 9be2260e4ca2..f8b644cb9157 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -934,7 +934,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s, // to the reason. unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle, IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES); - lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0); + lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0); } if (lost) { dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n", diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index 741a5d17ae4c..ecd3aec57c87 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -629,17 +629,15 @@ void snd_hdac_stream_timecounter_init(struct hdac_stream *azx_dev, struct hdac_stream *s; bool inited = false; u64 cycle_last = 0; - int i = 0; list_for_each_entry(s, &bus->stream_list, list) { - if (streams & (1 << i)) { + if ((streams & (1 << s->index))) { azx_timecounter_init(s, inited, cycle_last); if (!inited) { inited = true; cycle_last = s->tc.cycle_last; } } - i++; } snd_pcm_gettime(runtime, &runtime->trigger_tstamp); @@ -684,14 +682,13 @@ void snd_hdac_stream_sync(struct hdac_stream *azx_dev, bool start, unsigned int streams) { struct hdac_bus *bus = azx_dev->bus; - int i, nwait, timeout; + int nwait, timeout; struct hdac_stream *s; for (timeout = 5000; timeout; timeout--) { nwait = 0; - i = 0; list_for_each_entry(s, &bus->stream_list, list) { - if (!(streams & (1 << i++))) + if (!(streams & (1 << s->index))) continue; if (start) { diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 5aaf3dcecf27..a26f2a2d44cf 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2549,6 +2549,8 @@ static const struct pci_device_id azx_ids[] = { /* Lunarlake-P */ { PCI_DEVICE(0x8086, 0xa828), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Arrow Lake */ + { PCI_DEVICE_DATA(INTEL, HDA_ARL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE) }, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_BROXTON }, diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index a889cccdd607..e8209178d87b 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -21,6 +21,12 @@ #include "hda_jack.h" #include "hda_generic.h" +enum { + CX_HEADSET_NOPRESENT = 0, + CX_HEADSET_PARTPRESENT, + CX_HEADSET_ALLPRESENT, +}; + struct conexant_spec { struct hda_gen_spec gen; @@ -42,7 +48,8 @@ struct conexant_spec { unsigned int gpio_led; unsigned int gpio_mute_led_mask; unsigned int gpio_mic_led_mask; - + unsigned int headset_present_flag; + bool is_cx8070_sn6140; }; @@ -164,6 +171,27 @@ static void cxt_init_gpio_led(struct hda_codec *codec) } } +static void cx_fixup_headset_recog(struct hda_codec *codec) +{ + unsigned int mic_persent; + + /* fix some headset type recognize fail issue, such as EDIFIER headset */ + /* set micbiasd output current comparator threshold from 66% to 55%. */ + snd_hda_codec_write(codec, 0x1c, 0, 0x320, 0x010); + /* set OFF voltage for DFET from -1.2V to -0.8V, set headset micbias registor + * value adjustment trim from 2.2K ohms to 2.0K ohms. + */ + snd_hda_codec_write(codec, 0x1c, 0, 0x3b0, 0xe10); + /* fix reboot headset type recognize fail issue */ + mic_persent = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (mic_persent & AC_PINSENSE_PRESENCE) + /* enable headset mic VREF */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24); + else + /* disable headset mic VREF */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); +} + static int cx_auto_init(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; @@ -174,6 +202,9 @@ static int cx_auto_init(struct hda_codec *codec) cxt_init_gpio_led(codec); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT); + if (spec->is_cx8070_sn6140) + cx_fixup_headset_recog(codec); + return 0; } @@ -192,6 +223,77 @@ static void cx_auto_free(struct hda_codec *codec) snd_hda_gen_free(codec); } +static void cx_process_headset_plugin(struct hda_codec *codec) +{ + unsigned int val; + unsigned int count = 0; + + /* Wait headset detect done. */ + do { + val = snd_hda_codec_read(codec, 0x1c, 0, 0xca0, 0x0); + if (val & 0x080) { + codec_dbg(codec, "headset type detect done!\n"); + break; + } + msleep(20); + count++; + } while (count < 3); + val = snd_hda_codec_read(codec, 0x1c, 0, 0xcb0, 0x0); + if (val & 0x800) { + codec_dbg(codec, "headset plugin, type is CTIA\n"); + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24); + } else if (val & 0x400) { + codec_dbg(codec, "headset plugin, type is OMTP\n"); + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24); + } else { + codec_dbg(codec, "headphone plugin\n"); + } +} + +static void cx_update_headset_mic_vref(struct hda_codec *codec, unsigned int res) +{ + unsigned int phone_present, mic_persent, phone_tag, mic_tag; + struct conexant_spec *spec = codec->spec; + + /* In cx8070 and sn6140, the node 16 can only be config to headphone or disabled, + * the node 19 can only be config to microphone or disabled. + * Check hp&mic tag to process headset pulgin&plugout. + */ + phone_tag = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); + mic_tag = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); + if ((phone_tag & (res >> AC_UNSOL_RES_TAG_SHIFT)) || + (mic_tag & (res >> AC_UNSOL_RES_TAG_SHIFT))) { + phone_present = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (!(phone_present & AC_PINSENSE_PRESENCE)) {/* headphone plugout */ + spec->headset_present_flag = CX_HEADSET_NOPRESENT; + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); + return; + } + if (spec->headset_present_flag == CX_HEADSET_NOPRESENT) { + spec->headset_present_flag = CX_HEADSET_PARTPRESENT; + } else if (spec->headset_present_flag == CX_HEADSET_PARTPRESENT) { + mic_persent = snd_hda_codec_read(codec, 0x19, 0, + AC_VERB_GET_PIN_SENSE, 0x0); + /* headset is present */ + if ((phone_present & AC_PINSENSE_PRESENCE) && + (mic_persent & AC_PINSENSE_PRESENCE)) { + cx_process_headset_plugin(codec); + spec->headset_present_flag = CX_HEADSET_ALLPRESENT; + } + } + } +} + +static void cx_jack_unsol_event(struct hda_codec *codec, unsigned int res) +{ + struct conexant_spec *spec = codec->spec; + + if (spec->is_cx8070_sn6140) + cx_update_headset_mic_vref(codec, res); + + snd_hda_jack_unsol_event(codec, res); +} + #ifdef CONFIG_PM static int cx_auto_suspend(struct hda_codec *codec) { @@ -205,7 +307,7 @@ static const struct hda_codec_ops cx_auto_patch_ops = { .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, .free = cx_auto_free, - .unsol_event = snd_hda_jack_unsol_event, + .unsol_event = cx_jack_unsol_event, #ifdef CONFIG_PM .suspend = cx_auto_suspend, .check_power_status = snd_hda_gen_check_power_status, @@ -242,6 +344,7 @@ enum { CXT_FIXUP_HP_ZBOOK_MUTE_LED, CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, + CXT_PINCFG_SWS_JS201D, }; /* for hda_fixup_thinkpad_acpi() */ @@ -739,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = { {} }; +/* SuoWoSi/South-holding JS201D with sn6140 */ +static const struct hda_pintbl cxt_pincfg_sws_js201d[] = { + { 0x16, 0x03211040 }, /* hp out */ + { 0x17, 0x91170110 }, /* SPK/Class_D */ + { 0x18, 0x95a70130 }, /* Internal mic */ + { 0x19, 0x03a11020 }, /* Headset Mic */ + { 0x1a, 0x40f001f0 }, /* Not used */ + { 0x21, 0x40f001f0 }, /* Not used */ + {} +}; + static const struct hda_fixup cxt_fixups[] = { [CXT_PINCFG_LENOVO_X200] = { .type = HDA_FIXUP_PINS, @@ -894,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = { .chained = true, .chain_id = CXT_FIXUP_HEADSET_MIC, }, + [CXT_PINCFG_SWS_JS201D] = { + .type = HDA_FIXUP_PINS, + .v.pins = cxt_pincfg_sws_js201d, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -967,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), + SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410), @@ -1007,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, + { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" }, {} }; @@ -1042,6 +1162,15 @@ static int patch_conexant_auto(struct hda_codec *codec) codec->spec = spec; codec->patch_ops = cx_auto_patch_ops; + /* init cx8070/sn6140 flag and reset headset_present_flag */ + switch (codec->core.vendor_id) { + case 0x14f11f86: + case 0x14f11f87: + spec->is_cx8070_sn6140 = true; + spec->headset_present_flag = CX_HEADSET_NOPRESENT; + break; + } + cx_auto_parse_eapd(codec); spec->gen.own_eapd_ctl = 1; diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 627899959ffe..e41316e2e983 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1; spec->scodecs[CS8409_CODEC1]->codec = codec; spec->num_scodecs = 2; + spec->gen.suppress_vmaster = 1; codec->patch_ops = cs8409_dolphin_patch_ops; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 62f213704492..fb12034d464e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3681,6 +3681,7 @@ static void alc285_hp_init(struct hda_codec *codec) int i, val; int coef38, coef0d, coef36; + alc_write_coefex_idx(codec, 0x58, 0x00, 0x1888); /* write default value */ alc_update_coef_idx(codec, 0x4a, 1<<15, 1<<15); /* Reset HP JD */ coef38 = alc_read_coef_idx(codec, 0x38); /* Amp control */ coef0d = alc_read_coef_idx(codec, 0x0d); /* Digital Misc control */ @@ -6692,6 +6693,60 @@ static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, } } +static void alc285_fixup_hp_envy_x360(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + static const struct coef_fw coefs[] = { + WRITE_COEF(0x08, 0x6a0c), WRITE_COEF(0x0d, 0xa023), + WRITE_COEF(0x10, 0x0320), WRITE_COEF(0x1a, 0x8c03), + WRITE_COEF(0x25, 0x1800), WRITE_COEF(0x26, 0x003a), + WRITE_COEF(0x28, 0x1dfe), WRITE_COEF(0x29, 0xb014), + WRITE_COEF(0x2b, 0x1dfe), WRITE_COEF(0x37, 0xfe15), + WRITE_COEF(0x38, 0x7909), WRITE_COEF(0x45, 0xd489), + WRITE_COEF(0x46, 0x00f4), WRITE_COEF(0x4a, 0x21e0), + WRITE_COEF(0x66, 0x03f0), WRITE_COEF(0x67, 0x1000), + WRITE_COEF(0x6e, 0x1005), { } + }; + + static const struct hda_pintbl pincfgs[] = { + { 0x12, 0xb7a60130 }, /* Internal microphone*/ + { 0x14, 0x90170150 }, /* B&O soundbar speakers */ + { 0x17, 0x90170153 }, /* Side speakers */ + { 0x19, 0x03a11040 }, /* Headset microphone */ + { } + }; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_apply_pincfgs(codec, pincfgs); + + /* Fixes volume control problem for side speakers */ + alc295_fixup_disable_dac3(codec, fix, action); + + /* Fixes no sound from headset speaker */ + snd_hda_codec_amp_stereo(codec, 0x21, HDA_OUTPUT, 0, -1, 0); + + /* Auto-enable headset mic when plugged */ + snd_hda_jack_set_gating_jack(codec, 0x19, 0x21); + + /* Headset mic volume enhancement */ + snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREF50); + break; + case HDA_FIXUP_ACT_INIT: + alc_process_coef_fw(codec, coefs); + break; + case HDA_FIXUP_ACT_BUILD: + rename_ctl(codec, "Bass Speaker Playback Volume", + "B&O-Tuned Playback Volume"); + rename_ctl(codec, "Front Playback Switch", + "B&O Soundbar Playback Switch"); + rename_ctl(codec, "Bass Speaker Playback Switch", + "Side Speaker Playback Switch"); + break; + } +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -7130,6 +7185,7 @@ enum { ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, ALC285_FIXUP_HP_SPECTRE_X360_EB1, + ALC285_FIXUP_HP_ENVY_X360, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13, @@ -9053,6 +9109,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360_eb1 }, + [ALC285_FIXUP_HP_ENVY_X360] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_envy_x360, + .chained = true, + .chain_id = ALC285_FIXUP_HP_GPIO_AMP_INIT, + }, [ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_ideapad_s740_coef, @@ -9377,7 +9439,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cs35l41_fixup_i2c_two, .chained = true, - .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC245_FIXUP_HP_MUTE_LED_COEFBIT] = { .type = HDA_FIXUP_FUNC, @@ -9392,6 +9454,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC287_FIXUP_THINKPAD_I2S_SPK] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_bind_dacs, + .chained = true, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { .type = HDA_FIXUP_FUNC, @@ -9431,6 +9495,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), @@ -9591,6 +9656,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), @@ -9617,6 +9683,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -9658,6 +9725,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), @@ -9683,16 +9751,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), + SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), @@ -9720,8 +9792,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c8a, "HP EliteBook 630", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c90, "HP EliteBook 640", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), @@ -10049,6 +10127,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), + SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), @@ -10236,6 +10315,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, + {.id = ALC285_FIXUP_HP_ENVY_X360, .name = "alc285-hp-envy-x360"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, .name = "alc287-yoga9-bass-spk-pin"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, @@ -10674,6 +10754,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { * at most one tbl is allowed to define for the same vendor and same codec */ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1025, "Acer", ALC2XX_FIXUP_HEADSET_MIC, + {0x19, 0x40000000}), SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, {0x19, 0x40000000}, {0x1b, 0x40000000}), @@ -11363,8 +11445,7 @@ static void alc897_hp_automute_hook(struct hda_codec *codec, snd_hda_gen_hp_automute(codec, jack); vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP; - snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, - vref); + snd_hda_set_pin_ctl(codec, 0x1b, vref); } static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, @@ -11373,6 +11454,10 @@ static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->gen.hp_automute_hook = alc897_hp_automute_hook; + spec->no_shutup_pins = 1; + } + if (action == HDA_FIXUP_ACT_PROBE) { + snd_hda_set_pin_ctl_cache(codec, 0x1a, PIN_IN | AC_PINCTL_VREF_100); } } diff --git a/sound/sh/aica.c b/sound/sh/aica.c index 6e9d6bd67369..8b47bfcd9031 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -278,7 +278,8 @@ static void run_spu_dma(struct work_struct *work) dreamcastcard->clicks++; if (unlikely(dreamcastcard->clicks >= AICA_PERIOD_NUMBER)) dreamcastcard->clicks %= AICA_PERIOD_NUMBER; - mod_timer(&dreamcastcard->timer, jiffies + 1); + if (snd_pcm_running(dreamcastcard->substream)) + mod_timer(&dreamcastcard->timer, jiffies + 1); } } @@ -290,6 +291,8 @@ static void aica_period_elapsed(struct timer_list *t) /*timer function - so cannot sleep */ int play_period; struct snd_pcm_runtime *runtime; + if (!snd_pcm_running(substream)) + return; runtime = substream->runtime; dreamcastcard = substream->pcm->private_data; /* Have we played out an additional period? */ @@ -350,12 +353,19 @@ static int snd_aicapcm_pcm_open(struct snd_pcm_substream return 0; } +static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct snd_card_aica *dreamcastcard = substream->pcm->private_data; + + del_timer_sync(&dreamcastcard->timer); + cancel_work_sync(&dreamcastcard->spu_dma_work); + return 0; +} + static int snd_aicapcm_pcm_close(struct snd_pcm_substream *substream) { struct snd_card_aica *dreamcastcard = substream->pcm->private_data; - flush_work(&(dreamcastcard->spu_dma_work)); - del_timer(&dreamcastcard->timer); dreamcastcard->substream = NULL; kfree(dreamcastcard->channel); spu_disable(); @@ -401,6 +411,7 @@ static const struct snd_pcm_ops snd_aicapcm_playback_ops = { .prepare = snd_aicapcm_pcm_prepare, .trigger = snd_aicapcm_pcm_trigger, .pointer = snd_aicapcm_pcm_pointer, + .sync_stop = snd_aicapcm_pcm_sync_stop, }; /* TO DO: set up to handle more than one pcm instance */ diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index f19f064a7527..972600d27158 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -114,16 +114,14 @@ static int acp_sof_probe(struct platform_device *pdev) card->num_controls = ARRAY_SIZE(acp_controls); card->drvdata = (struct acp_card_drvdata *)pdev->id_entry->driver_data; - acp_sofdsp_dai_links_create(card); + ret = acp_sofdsp_dai_links_create(card); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to create DAI links\n"); ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) { - dev_err(&pdev->dev, - "devm_snd_soc_register_card(%s) failed: %d\n", - card->name, ret); - return ret; - } - + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to register card(%s)\n", card->name); return 0; } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 808d00282623..0568e64d1015 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,6 +199,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J0"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -227,6 +234,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82UU"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -241,6 +255,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82YM"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83AS"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -297,6 +318,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 15 2022"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"), + } + }, { .driver_data = &acp6x_card, .matches = { diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 2ccc68513f7c..bb9de5767ebc 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1581,7 +1581,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, u16 gain_reg; u16 reg; int val; - int offset_val = 0; struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); if (w->shift == WSA_MACRO_COMP1) { @@ -1620,10 +1619,8 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, CDC_WSA_RX1_RX_PATH_MIX_SEC0, CDC_WSA_RX_PGA_HALF_DB_MASK, CDC_WSA_RX_PGA_HALF_DB_ENABLE); - offset_val = -2; } val = snd_soc_component_read(component, gain_reg); - val += offset_val; snd_soc_component_write(component, gain_reg, val); wsa_macro_config_ear_spkr_gain(component, wsa, event, gain_reg); @@ -1651,10 +1648,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, CDC_WSA_RX1_RX_PATH_MIX_SEC0, CDC_WSA_RX_PGA_HALF_DB_MASK, CDC_WSA_RX_PGA_HALF_DB_DISABLE); - offset_val = 2; - val = snd_soc_component_read(component, gain_reg); - val += offset_val; - snd_soc_component_write(component, gain_reg, val); } wsa_macro_config_ear_spkr_gain(component, wsa, event, gain_reg); diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 844d14d4c9a5..aac914074996 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3802,6 +3802,16 @@ static const struct dmi_system_id dmi_platform_data[] = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"), + /* + * Above strings are too generic, LattePanda BIOS versions for + * all 4 hw revisions are: + * DF-BI-7-S70CR100-* + * DF-BI-7-S70CR110-* + * DF-BI-7-S70CR200-* + * LP-BS-7-S70CR700-* + * Do a partial match for S70CR to avoid false positive matches. + */ + DMI_MATCH(DMI_BIOS_VERSION, "S70CR"), }, .driver_data = (void *)&lattepanda_board_platform_data, }, diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index a2abd1a11161..555b74e7172d 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -210,7 +210,7 @@ struct wcd938x_priv { }; static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800); -static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000); +static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000); struct wcd938x_mbhc_zdet_param { @@ -3588,7 +3588,7 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_populate_dt_data(wcd938x, dev); if (ret) { dev_err(dev, "%s: Fail to obtain platform data\n", __func__); - return -EINVAL; + return ret; } ret = wcd938x_add_slave_components(wcd938x, dev, &match); diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b901e4c65e8a..d215e58c4a7b 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2229,6 +2229,9 @@ SND_SOC_DAPM_PGA_E("HPOUT", SND_SOC_NOPM, 0, 0, NULL, 0, hp_event, SND_SOC_DAPM_OUTPUT("HPOUTL"), SND_SOC_DAPM_OUTPUT("HPOUTR"), + +SND_SOC_DAPM_PGA("SPKOUTL Output", WM8962_CLASS_D_CONTROL_1, 6, 0, NULL, 0), +SND_SOC_DAPM_PGA("SPKOUTR Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), }; static const struct snd_soc_dapm_widget wm8962_dapm_spk_mono_widgets[] = { @@ -2236,7 +2239,6 @@ SND_SOC_DAPM_MIXER("Speaker Mixer", WM8962_MIXER_ENABLES, 1, 0, spkmixl, ARRAY_SIZE(spkmixl)), SND_SOC_DAPM_MUX_E("Speaker PGA", WM8962_PWR_MGMT_2, 4, 0, &spkoutl_mux, out_pga_event, SND_SOC_DAPM_POST_PMU), -SND_SOC_DAPM_PGA("Speaker Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), SND_SOC_DAPM_OUTPUT("SPKOUT"), }; @@ -2251,9 +2253,6 @@ SND_SOC_DAPM_MUX_E("SPKOUTL PGA", WM8962_PWR_MGMT_2, 4, 0, &spkoutl_mux, SND_SOC_DAPM_MUX_E("SPKOUTR PGA", WM8962_PWR_MGMT_2, 3, 0, &spkoutr_mux, out_pga_event, SND_SOC_DAPM_POST_PMU), -SND_SOC_DAPM_PGA("SPKOUTR Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), -SND_SOC_DAPM_PGA("SPKOUTL Output", WM8962_CLASS_D_CONTROL_1, 6, 0, NULL, 0), - SND_SOC_DAPM_OUTPUT("SPKOUTL"), SND_SOC_DAPM_OUTPUT("SPKOUTR"), }; @@ -2366,12 +2365,18 @@ static const struct snd_soc_dapm_route wm8962_spk_mono_intercon[] = { { "Speaker PGA", "Mixer", "Speaker Mixer" }, { "Speaker PGA", "DAC", "DACL" }, - { "Speaker Output", NULL, "Speaker PGA" }, - { "Speaker Output", NULL, "SYSCLK" }, - { "Speaker Output", NULL, "TOCLK" }, - { "Speaker Output", NULL, "TEMP_SPK" }, + { "SPKOUTL Output", NULL, "Speaker PGA" }, + { "SPKOUTL Output", NULL, "SYSCLK" }, + { "SPKOUTL Output", NULL, "TOCLK" }, + { "SPKOUTL Output", NULL, "TEMP_SPK" }, - { "SPKOUT", NULL, "Speaker Output" }, + { "SPKOUTR Output", NULL, "Speaker PGA" }, + { "SPKOUTR Output", NULL, "SYSCLK" }, + { "SPKOUTR Output", NULL, "TOCLK" }, + { "SPKOUTR Output", NULL, "TEMP_SPK" }, + + { "SPKOUT", NULL, "SPKOUTL Output" }, + { "SPKOUT", NULL, "SPKOUTR Output" }, }; static const struct snd_soc_dapm_route wm8962_spk_stereo_intercon[] = { @@ -2914,8 +2919,12 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s switch (fll_id) { case WM8962_FLL_MCLK: case WM8962_FLL_BCLK: + fll1 |= (fll_id - 1) << WM8962_FLL_REFCLK_SRC_SHIFT; + break; case WM8962_FLL_OSC: fll1 |= (fll_id - 1) << WM8962_FLL_REFCLK_SRC_SHIFT; + snd_soc_component_update_bits(component, WM8962_PLL2, + WM8962_OSC_ENA, WM8962_OSC_ENA); break; case WM8962_FLL_INT: snd_soc_component_update_bits(component, WM8962_FLL_CONTROL_1, @@ -2924,7 +2933,7 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s WM8962_FLL_FRC_NCO, WM8962_FLL_FRC_NCO); break; default: - dev_err(component->dev, "Unknown FLL source %d\n", ret); + dev_err(component->dev, "Unknown FLL source %d\n", source); return -EINVAL; } diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 2cfca78f0401..47a4c363227c 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -740,19 +740,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp, const char *filetype) { struct cs_dsp *cs_dsp = &dsp->cs_dsp; + const char *fwf; char *s, c; int ret = 0; + if (dsp->fwf_name) + fwf = dsp->fwf_name; + else + fwf = dsp->cs_dsp.name; + if (system_name && asoc_component_prefix) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix, filetype); else if (system_name) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name, + *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf, wm_adsp_fw[dsp->fw].file, filetype); if (*filename == NULL) @@ -842,29 +848,18 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", - cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, - system_name, asoc_component_prefix); + cirrus_dir, dsp->part, + dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, + wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); return -ENOENT; } static int wm_adsp_common_init(struct wm_adsp *dsp) { - char *p; - INIT_LIST_HEAD(&dsp->compr_list); INIT_LIST_HEAD(&dsp->buffer_list); - if (!dsp->fwf_name) { - p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL); - if (!p) - return -ENOMEM; - - dsp->fwf_name = p; - for (; *p != 0; ++p) - *p = tolower(*p); - } - return 0; } diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index b152f4e5c4f2..cd96c35a150c 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1102,7 +1102,11 @@ static int wsa_dev_mode_put(struct snd_kcontrol *kcontrol, return 1; } -static const DECLARE_TLV_DB_SCALE(pa_gain, -300, 150, -300); +static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(pa_gain, + 0, 14, TLV_DB_SCALE_ITEM(-300, 0, 0), + 15, 29, TLV_DB_SCALE_ITEM(-300, 150, 0), + 30, 31, TLV_DB_SCALE_ITEM(1800, 0, 0), +); static int wsa883x_get_swr_port(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 797d0a48d606..094445036c20 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -685,6 +685,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { /* Chuwi Vi8 dual-boot (CWI506) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "i86"), + /* The above are too generic, also match BIOS info */ + DMI_MATCH(DMI_BIOS_VERSION, "CHUWI2.D86JHBNR02"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* Chuwi Vi10 (CWI505) */ .matches = { diff --git a/sound/soc/meson/aiu.c b/sound/soc/meson/aiu.c index 88e611e64d14..077b9c0b6c4c 100644 --- a/sound/soc/meson/aiu.c +++ b/sound/soc/meson/aiu.c @@ -218,11 +218,12 @@ static const char * const aiu_spdif_ids[] = { static int aiu_clk_get(struct device *dev) { struct aiu *aiu = dev_get_drvdata(dev); + struct clk *pclk; int ret; - aiu->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(aiu->pclk)) - return dev_err_probe(dev, PTR_ERR(aiu->pclk), "Can't get the aiu pclk\n"); + pclk = devm_clk_get_enabled(dev, "pclk"); + if (IS_ERR(pclk)) + return dev_err_probe(dev, PTR_ERR(pclk), "Can't get the aiu pclk\n"); aiu->spdif_mclk = devm_clk_get(dev, "spdif_mclk"); if (IS_ERR(aiu->spdif_mclk)) @@ -239,18 +240,6 @@ static int aiu_clk_get(struct device *dev) if (ret) return dev_err_probe(dev, ret, "Can't get the spdif clocks\n"); - ret = clk_prepare_enable(aiu->pclk); - if (ret) { - dev_err(dev, "peripheral clock enable failed\n"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - aiu->pclk); - if (ret) - dev_err(dev, "failed to add reset action on pclk"); - return ret; } diff --git a/sound/soc/meson/aiu.h b/sound/soc/meson/aiu.h index 393b6c2307e4..0f94c8bf6081 100644 --- a/sound/soc/meson/aiu.h +++ b/sound/soc/meson/aiu.h @@ -33,7 +33,6 @@ struct aiu_platform_data { }; struct aiu { - struct clk *pclk; struct clk *spdif_mclk; struct aiu_interface i2s; struct aiu_interface spdif; diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index c040c83637e0..028383f949ef 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -12,6 +12,9 @@ #include "axg-tdm.h" +/* Maximum bit clock frequency according the datasheets */ +#define MAX_SCLK 100000000 /* Hz */ + enum { TDM_IFACE_PAD, TDM_IFACE_LOOPBACK, @@ -155,19 +158,27 @@ static int axg_tdm_iface_startup(struct snd_pcm_substream *substream, return -EINVAL; } - /* Apply component wide rate symmetry */ if (snd_soc_component_active(dai->component)) { + /* Apply component wide rate symmetry */ ret = snd_pcm_hw_constraint_single(substream->runtime, SNDRV_PCM_HW_PARAM_RATE, iface->rate); - if (ret < 0) { - dev_err(dai->dev, - "can't set iface rate constraint\n"); - return ret; - } + + } else { + /* Limit rate according to the slot number and width */ + unsigned int max_rate = + MAX_SCLK / (iface->slots * iface->slot_width); + ret = snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + 0, max_rate); } - return 0; + if (ret < 0) + dev_err(dai->dev, "can't set iface rate constraint\n"); + else + ret = 0; + + return ret; } static int axg_tdm_iface_set_stream(struct snd_pcm_substream *substream, @@ -266,8 +277,8 @@ static int axg_tdm_iface_set_sclk(struct snd_soc_dai *dai, srate = iface->slots * iface->slot_width * params_rate(params); if (!iface->mclk_rate) { - /* If no specific mclk is requested, default to bit clock * 4 */ - clk_set_rate(iface->mclk, 4 * srate); + /* If no specific mclk is requested, default to bit clock * 2 */ + clk_set_rate(iface->mclk, 2 * srate); } else { /* Check if we can actually get the bit clock from mclk */ if (iface->mclk_rate % srate) { diff --git a/sound/soc/meson/t9015.c b/sound/soc/meson/t9015.c index 9c6b4dac6893..571f65788c59 100644 --- a/sound/soc/meson/t9015.c +++ b/sound/soc/meson/t9015.c @@ -48,7 +48,6 @@ #define POWER_CFG 0x10 struct t9015 { - struct clk *pclk; struct regulator *avdd; }; @@ -249,6 +248,7 @@ static int t9015_probe(struct platform_device *pdev) struct t9015 *priv; void __iomem *regs; struct regmap *regmap; + struct clk *pclk; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -256,26 +256,14 @@ static int t9015_probe(struct platform_device *pdev) return -ENOMEM; platform_set_drvdata(pdev, priv); - priv->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(priv->pclk)) - return dev_err_probe(dev, PTR_ERR(priv->pclk), "failed to get core clock\n"); + pclk = devm_clk_get_enabled(dev, "pclk"); + if (IS_ERR(pclk)) + return dev_err_probe(dev, PTR_ERR(pclk), "failed to get core clock\n"); priv->avdd = devm_regulator_get(dev, "AVDD"); if (IS_ERR(priv->avdd)) return dev_err_probe(dev, PTR_ERR(priv->avdd), "failed to AVDD\n"); - ret = clk_prepare_enable(priv->pclk); - if (ret) { - dev_err(dev, "core clock enable failed\n"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - priv->pclk); - if (ret) - return ret; - ret = device_reset(dev); if (ret) { dev_err(dev, "reset failed\n"); diff --git a/sound/soc/sof/ipc3-loader.c b/sound/soc/sof/ipc3-loader.c index bf423ca4e97b..6e3ef0672110 100644 --- a/sound/soc/sof/ipc3-loader.c +++ b/sound/soc/sof/ipc3-loader.c @@ -138,8 +138,7 @@ static ssize_t ipc3_fw_ext_man_size(struct snd_sof_dev *sdev, const struct firmw static size_t sof_ipc3_fw_parse_ext_man(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; const struct sof_ext_man_elem_header *elem_hdr; const struct sof_ext_man_header *head; ssize_t ext_man_size; @@ -149,6 +148,8 @@ static size_t sof_ipc3_fw_parse_ext_man(struct snd_sof_dev *sdev) head = (struct sof_ext_man_header *)fw->data; remaining = head->full_size - head->header_size; + if (remaining < 0 || remaining > sdev->basefw.fw->size) + return -EINVAL; ext_man_size = ipc3_fw_ext_man_size(sdev, fw); /* Assert firmware starts with extended manifest */ @@ -310,18 +311,18 @@ static int sof_ipc3_parse_module_memcpy(struct snd_sof_dev *sdev, static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + u32 payload_offset = sdev->basefw.payload_offset; + const struct firmware *fw = sdev->basefw.fw; struct snd_sof_fw_header *header; struct snd_sof_mod_hdr *module; int (*load_module)(struct snd_sof_dev *sof_dev, struct snd_sof_mod_hdr *hdr); size_t remaining; int ret, count; - if (!plat_data->fw) + if (!fw) return -EINVAL; - header = (struct snd_sof_fw_header *)(fw->data + plat_data->fw_offset); + header = (struct snd_sof_fw_header *)(fw->data + payload_offset); load_module = sof_ops(sdev)->load_module; if (!load_module) { dev_dbg(sdev->dev, "Using generic module loading\n"); @@ -331,9 +332,8 @@ static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) } /* parse each module */ - module = (struct snd_sof_mod_hdr *)(fw->data + plat_data->fw_offset + - sizeof(*header)); - remaining = fw->size - sizeof(*header) - plat_data->fw_offset; + module = (struct snd_sof_mod_hdr *)(fw->data + payload_offset + sizeof(*header)); + remaining = fw->size - sizeof(*header) - payload_offset; /* check for wrap */ if (remaining > fw->size) { dev_err(sdev->dev, "%s: fw size smaller than header size\n", __func__); @@ -374,19 +374,19 @@ static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) static int sof_ipc3_validate_firmware(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + u32 payload_offset = sdev->basefw.payload_offset; + const struct firmware *fw = sdev->basefw.fw; struct snd_sof_fw_header *header; - size_t fw_size = fw->size - plat_data->fw_offset; + size_t fw_size = fw->size - payload_offset; - if (fw->size <= plat_data->fw_offset) { + if (fw->size <= payload_offset) { dev_err(sdev->dev, "firmware size must be greater than firmware offset\n"); return -EINVAL; } /* Read the header information from the data pointer */ - header = (struct snd_sof_fw_header *)(fw->data + plat_data->fw_offset); + header = (struct snd_sof_fw_header *)(fw->data + payload_offset); /* verify FW sig */ if (strncmp(header->sig, SND_SOF_FW_SIG, SND_SOF_FW_SIG_SIZE) != 0) { diff --git a/sound/soc/sof/ipc4-loader.c b/sound/soc/sof/ipc4-loader.c index e635ae515fa9..9f433e9b4cd3 100644 --- a/sound/soc/sof/ipc4-loader.c +++ b/sound/soc/sof/ipc4-loader.c @@ -17,9 +17,8 @@ static size_t sof_ipc4_fw_parse_ext_man(struct snd_sof_dev *sdev) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; - struct snd_sof_pdata *plat_data = sdev->pdata; struct sof_man4_fw_binary_header *fw_header; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; struct sof_ext_manifest4_hdr *ext_man_hdr; struct sof_man4_module_config *fm_config; struct sof_ipc4_fw_module *fw_module; @@ -138,9 +137,8 @@ static int sof_ipc4_validate_firmware(struct snd_sof_dev *sdev) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; u32 fw_hdr_offset = ipc4_data->manifest_fw_hdr_offset; - struct snd_sof_pdata *plat_data = sdev->pdata; struct sof_man4_fw_binary_header *fw_header; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; struct sof_ext_manifest4_hdr *ext_man_hdr; ext_man_hdr = (struct sof_ext_manifest4_hdr *)fw->data; diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index 5f51d936b306..ba8e3aae0a5c 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -22,7 +22,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) int ret; /* Don't request firmware again if firmware is already requested */ - if (plat_data->fw) + if (sdev->basefw.fw) return 0; fw_filename = kasprintf(GFP_KERNEL, "%s/%s", @@ -31,7 +31,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) if (!fw_filename) return -ENOMEM; - ret = request_firmware(&plat_data->fw, fw_filename, sdev->dev); + ret = request_firmware(&sdev->basefw.fw, fw_filename, sdev->dev); if (ret < 0) { dev_err(sdev->dev, @@ -48,7 +48,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) ext_man_size = sdev->ipc->ops->fw_loader->parse_ext_manifest(sdev); if (ext_man_size > 0) { /* when no error occurred, drop extended manifest */ - plat_data->fw_offset = ext_man_size; + sdev->basefw.payload_offset = ext_man_size; } else if (!ext_man_size) { /* No extended manifest, so nothing to skip during FW load */ dev_dbg(sdev->dev, "firmware doesn't contain extended manifest\n"); @@ -58,6 +58,12 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) fw_filename, ret); } + /* + * Until the platform code is switched to use the new container the fw + * and payload offset must be set in plat_data + */ + plat_data->fw = sdev->basefw.fw; + plat_data->fw_offset = sdev->basefw.payload_offset; err: kfree(fw_filename); @@ -100,7 +106,8 @@ int snd_sof_load_firmware_memcpy(struct snd_sof_dev *sdev) return 0; error: - release_firmware(plat_data->fw); + release_firmware(sdev->basefw.fw); + sdev->basefw.fw = NULL; plat_data->fw = NULL; return ret; @@ -185,7 +192,8 @@ EXPORT_SYMBOL(snd_sof_run_firmware); void snd_sof_fw_unload(struct snd_sof_dev *sdev) { /* TODO: support module unloading at runtime */ - release_firmware(sdev->pdata->fw); + release_firmware(sdev->basefw.fw); + sdev->basefw.fw = NULL; sdev->pdata->fw = NULL; } EXPORT_SYMBOL(snd_sof_fw_unload); diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h index de08825915b3..3d70b57e4864 100644 --- a/sound/soc/sof/sof-priv.h +++ b/sound/soc/sof/sof-priv.h @@ -136,6 +136,17 @@ struct snd_sof_platform_stream_params { bool cont_update_posn; }; +/** + * struct sof_firmware - Container struct for SOF firmware + * @fw: Pointer to the firmware + * @payload_offset: Offset of the data within the loaded firmware image to be + * loaded to the DSP (skipping for example ext_manifest section) + */ +struct sof_firmware { + const struct firmware *fw; + u32 payload_offset; +}; + /* * SOF DSP HW abstraction operations. * Used to abstract DSP HW architecture and any IO busses between host CPU @@ -487,6 +498,9 @@ struct snd_sof_dev { spinlock_t ipc_lock; /* lock for IPC users */ spinlock_t hw_lock; /* lock for HW IO access */ + /* Main, Base firmware image */ + struct sof_firmware basefw; + /* * ASoC components. plat_drv fields are set dynamically so * can't use const diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c index bcceebca915a..484b0e7c2def 100644 --- a/sound/soc/sunxi/sun4i-spdif.c +++ b/sound/soc/sunxi/sun4i-spdif.c @@ -578,6 +578,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = { .compatible = "allwinner,sun50i-h6-spdif", .data = &sun50i_h6_spdif_quirks, }, + { + .compatible = "allwinner,sun50i-h616-spdif", + /* Essentially the same as the H6, but without RX */ + .data = &sun50i_h6_spdif_quirks, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match); diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 33db334e6556..a676ad093d18 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -328,8 +328,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); - if (err < 0) + if (err < 0) { + if (pins == 1) { + usb_audio_dbg(chip, + "%s(): selector returned an error, " + "assuming a firmware bug, id %d, ret %d\n", + __func__, clock_id, err); + return ret; + } return err; + } } if (!validate || ret > 0 || !chip->autoclock) diff --git a/sound/usb/format.c b/sound/usb/format.c index ab5fed9f55b6..3b45d0ee7693 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, int clock) { struct usb_device *dev = chip->dev; + struct usb_host_interface *alts; unsigned int *table; unsigned int nr_rates; int i, err; + u32 bmControls; /* performing the rate verification may lead to unexpected USB bus * behavior afterwards by some unknown reason. Do this only for the @@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES)) return 0; /* don't perform the validation as default */ + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) + return 0; + + if (fp->protocol == UAC_VERSION_3) { + struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = le32_to_cpu(as->bmControls); + } else { + struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = as->bmControls; + } + + if (!uac_v2v3_control_is_readable(bmControls, + UAC2_AS_VAL_ALT_SETTINGS)) + return 0; + table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 33380cad3a73..b8a474a2e4d5 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2029,10 +2029,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x0499, 0x3108, /* Yamaha YIT-W12TX */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_CTL_MSG_DELAY_5M), + DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ + QUIRK_FLAG_IFACE_SKIP_CLOSE), DEVICE_FLG(0x054c, 0x0b8c, /* Sony WALKMAN NW-A45 DAC */ QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */ @@ -2071,14 +2075,22 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x07fd, 0x000b, /* MOTU M Series 2nd hardware revision */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */ QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ + QUIRK_FLAG_FIXED_RATE), + DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ + QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */ @@ -2111,6 +2123,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x1901, 0x0191, /* GE B850V3 CP2114 audio interface */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ + QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x2040, 0x7201, /* Hauppauge HVR-950Q-MXL */ @@ -2153,6 +2169,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ @@ -2161,22 +2183,6 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ QUIRK_FLAG_ALIGN_TRANSFER), - DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ - QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ - QUIRK_FLAG_IFACE_SKIP_CLOSE), - DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ - QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 3d4add94e367..d5409f387945 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -300,9 +300,12 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, c = 0; if (bits) { - for (; bits && *maps; maps++, bits >>= 1) + for (; bits && *maps; maps++, bits >>= 1) { if (bits & 1) chmap->map[c++] = *maps; + if (c == chmap->channels) + break; + } } else { /* If we're missing wChannelConfig, then guess something to make sure the channel map is not skipped entirely */ diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h index 11ff975242ca..e2ff22b379a4 100644 --- a/tools/arch/x86/include/asm/rmwcc.h +++ b/tools/arch/x86/include/asm/rmwcc.h @@ -4,7 +4,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm goto (fullop "; j" cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 41c02b6f6f04..7e0b846e17ee 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2200,7 +2200,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj) int map_fd; profile_perf_events = calloc( - sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + obj->rodata->num_cpu * obj->rodata->num_metric, sizeof(int)); if (!profile_perf_events) { p_err("failed to allocate memory for perf_event array: %s", strerror(errno)); diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 77058174082d..ef0764d6891e 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ #include #define BTF_IDS_SECTION ".BTF_ids" -#define BTF_ID "__BTF_ID__" +#define BTF_ID_PREFIX "__BTF_ID__" #define BTF_STRUCT "struct" #define BTF_UNION "union" @@ -89,6 +90,14 @@ #define ADDR_CNT 100 +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATANATIVE ELFDATA2MSB +#else +# error "Unknown machine endianness!" +#endif + struct btf_id { struct rb_node rb_node; char *name; @@ -116,6 +125,7 @@ struct object { int idlist_shndx; size_t strtabidx; unsigned long idlist_addr; + int encoding; } efile; struct rb_root sets; @@ -161,7 +171,7 @@ static int eprintf(int level, int var, const char *fmt, ...) static bool is_btf_id(const char *name) { - return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1); + return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1); } static struct btf_id *btf_id__find(struct rb_root *root, const char *name) @@ -319,6 +329,7 @@ static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; size_t shdrstrndx; + GElf_Ehdr ehdr; int idx = 0; Elf *elf; int fd; @@ -350,6 +361,13 @@ static int elf_collect(struct object *obj) return -1; } + if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) { + pr_err("FAILED cannot get ELF header: %s\n", + elf_errmsg(-1)); + return -1; + } + obj->efile.encoding = ehdr.e_ident[EI_DATA]; + /* * Scan all the elf sections and look for save data * from .BTF_ids section and symbols. @@ -441,7 +459,7 @@ static int symbols_collect(struct object *obj) * __BTF_ID__TYPE__vfs_truncate__0 * prefix = ^ */ - prefix = name + sizeof(BTF_ID) - 1; + prefix = name + sizeof(BTF_ID_PREFIX) - 1; /* struct */ if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) { @@ -649,19 +667,18 @@ static int cmp_id(const void *pa, const void *pb) static int sets_patch(struct object *obj) { Elf_Data *data = obj->efile.idlist; - int *ptr = data->d_buf; struct rb_node *next; next = rb_first(&obj->sets); while (next) { - unsigned long addr, idx; + struct btf_id_set8 *set8; + struct btf_id_set *set; + unsigned long addr, off; struct btf_id *id; - int *base; - int cnt; id = rb_entry(next, struct btf_id, rb_node); addr = id->addr[0]; - idx = addr - obj->efile.idlist_addr; + off = addr - obj->efile.idlist_addr; /* sets are unique */ if (id->addr_cnt != 1) { @@ -670,14 +687,39 @@ static int sets_patch(struct object *obj) return -1; } - idx = idx / sizeof(int); - base = &ptr[idx] + (id->is_set8 ? 2 : 1); - cnt = ptr[idx]; + if (id->is_set) { + set = data->d_buf + off; + qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id); + } else { + set8 = data->d_buf + off; + /* + * Make sure id is at the beginning of the pairs + * struct, otherwise the below qsort would not work. + */ + BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); + qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); + + /* + * When ELF endianness does not match endianness of the + * host, libelf will do the translation when updating + * the ELF. This, however, corrupts SET8 flags which are + * already in the target endianness. So, let's bswap + * them to the host endianness and libelf will then + * correctly translate everything. + */ + if (obj->efile.encoding != ELFDATANATIVE) { + int i; + + set8->flags = bswap_32(set8->flags); + for (i = 0; i < set8->cnt; i++) { + set8->pairs[i].flags = + bswap_32(set8->pairs[i].flags); + } + } + } pr_debug("sorting addr %5lu: cnt %6d [%s]\n", - (idx + 1) * sizeof(int), cnt, id->name); - - qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); + off, id->is_set ? set->cnt : set8->cnt, id->name); next = rb_next(next); } diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index eb6303ff446e..4cfcef9da3e4 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1)) +#define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 1.1.1 is required" +#error "OpenCSD >= 1.2.1 is required" #endif int main(void) diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 2f882d5cb30f..72ea363d434d 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -3,11 +3,22 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#include /* for u32 */ + struct btf_id_set { u32 cnt; u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 1bdd834bdd57..d09f9dc172a4 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -36,8 +36,8 @@ #include #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #endif /* __LINUX_COMPILER_TYPES_H */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 201dc77ebbd7..d5d2183730b9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3109,6 +3109,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -3117,6 +3121,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6687,6 +6696,8 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6699,6 +6710,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6733,6 +6745,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -6747,9 +6762,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index fddc05c667b5..874fe362375d 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -35,7 +35,7 @@ extern "C" { #endif -int libbpf_set_memlock_rlim(size_t memlock_bytes); +LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 10f15a3e3a95..c71d4d0f5c6f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -70,6 +70,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static int map_set_def_max_entries(struct bpf_map *map); static const char * const attach_type_name[] = { [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", @@ -4157,6 +4158,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); + if (!scn_data) + return -LIBBPF_ERRNO__FORMAT; relo_sec_name = elf_sec_str(obj, shdr->sh_name); sec_name = elf_sec_name(obj, scn); @@ -4990,6 +4993,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { + err = map_set_def_max_entries(map->inner_map); + if (err) + return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 377642ff51fc..8669f6e0f6e2 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -17,6 +17,20 @@ #include #include "relo_core.h" +/* Android's libc doesn't support AT_EACCESS in faccessat() implementation + * ([0]), and just returns -EINVAL even if file exists and is accessible. + * See [1] for issues caused by this. + * + * So just redefine it to 0 on Android. + * + * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 + * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 + */ +#ifdef __ANDROID__ +#undef AT_EACCESS +#define AT_EACCESS 0 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index bf02d62a3b2b..42f57b640f11 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -50,11 +50,21 @@ void uniq(struct cmdnames *cmds) if (!cmds->cnt) return; - for (i = j = 1; i < cmds->cnt; i++) - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) - cmds->names[j++] = cmds->names[i]; - + for (i = 1; i < cmds->cnt; i++) { + if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) + zfree(&cmds->names[i - 1]); + } + for (i = 0, j = 0; i < cmds->cnt; i++) { + if (cmds->names[i]) { + if (i == j) + j++; + else + cmds->names[j++] = cmds->names[i]; + } + } cmds->cnt = j; + while (j < i) + cmds->names[j++] = NULL; } void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7314183cdcb6..b9b0fda8374e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1785,8 +1785,8 @@ static int record__switch_output(struct record *rec, bool at_exit) { struct perf_data *data = &rec->data; + char *new_filename = NULL; int fd, err; - char *new_filename; /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 76605fde3507..7db35dbdfcef 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2375,7 +2375,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, data->period = evsel->core.attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; data->misc = event->header.misc; - data->id = -1ULL; data->data_src = PERF_MEM_DATA_SRC_NONE; data->vcpu = -1; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bc866d18973e..ef9a3df45965 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -366,7 +366,7 @@ static void print_metric_only(struct perf_stat_config *config, if (color) mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; - color_snprintf(str, sizeof(str), color ?: "", fmt, val); + color_snprintf(str, sizeof(str), color ?: "", fmt ?: "", val); fprintf(out, "%*s ", mlen, str); } diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index c9bfe4696943..cee7fc3b5bb0 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -279,13 +279,13 @@ struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str) threads->nr = ntasks; } out: + strlist__delete(slist); if (threads) refcount_set(&threads->refcnt, 1); return threads; out_free_threads: zfree(&threads); - strlist__delete(slist); goto out; } diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e914cc45b766..6f00bee917a0 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -467,10 +467,20 @@ int setup_classid_environment(void) return 1; } - if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && - errno != EBUSY) { - log_err("mount cgroup net_cls"); - return 1; + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) { + if (errno != EBUSY) { + log_err("mount cgroup net_cls"); + return 1; + } + + if (rmdir(NETCLS_MOUNT_PATH)) { + log_err("rmdir cgroup net_cls"); + return 1; + } + if (umount(CGROUP_MOUNT_DFLT)) { + log_err("umount cgroup base"); + return 1; + } } cleanup_classid_environment(); diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index d711f4bea98e..47cb753ef1e3 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5211,6 +5211,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) #endif assert(0); + return 0; } static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c index c39f559d3100..42c4a8b62e36 100644 --- a/tools/testing/selftests/bpf/progs/pyperf180.c +++ b/tools/testing/selftests/bpf/progs/pyperf180.c @@ -1,4 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 180 + +/* llvm upstream commit at clang18 + * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e + * changed inlining behavior and caused compilation failure as some branch + * target distance exceeded 16bit representation which is the maximum for + * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18 + * to specify which cpu version is used for compilation. So a smaller + * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which + * reduced some branch target distances and resolved the compilation failure. + * + * To capture the case where a developer/ci uses clang18 but the corresponding + * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count + * will be set as well to prevent potential compilation failures. + */ +#ifdef __BPF_CPU_VERSION__ +#if __BPF_CPU_VERSION__ < 4 +#define UNROLL_COUNT 90 +#endif +#elif __clang_major__ == 18 +#define UNROLL_COUNT 90 +#endif + #include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index f416032ba858..b295f9b721bf 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -21,6 +21,32 @@ struct { __type(value, __u32); } mim_hash SEC(".maps"); +/* The following three maps are used to test + * perf_event_array map can be an inner + * map of hash/array_of_maps. + */ +struct perf_event_array { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, __u32); + __type(value, __u32); +} inner_map0 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_array_pe SEC(".maps") = { + .values = {&inner_map0}}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_hash_pe SEC(".maps") = { + .values = {&inner_map0}}; + SEC("xdp") int xdp_mimtest0(struct xdp_md *ctx) { diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index b73152822aa2..81cd48cc80c2 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1190,7 +1190,11 @@ static void test_map_in_map(void) goto out_map_in_map; } - bpf_object__load(obj); + err = bpf_object__load(obj); + if (err) { + printf("Failed to load test prog\n"); + goto out_map_in_map; + } map = bpf_object__find_map_by_name(obj, "mim_array"); if (!map) { diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 16c7fb858ac1..696ef9bf3afc 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -46,6 +46,17 @@ test_LAG_cleanup() ip link add mv0 link "$name" up address "$ucaddr" type macvlan # Used to test dev->mc handling ip address add "$addr6" dev "$name" + + # Check that addresses were added as expected + (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "macvlan unicast address not found on a slave" + + # mcaddr is added asynchronously by addrconf_dad_work(), use busywait + (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "IPv6 solicited-node multicast mac address not found on a slave" + ip link set dev "$name" down ip link del "$name" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 7bf56ea161e3..616d3581419c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test \ - max_erp_entries_test" + max_erp_entries_test max_group_size_test" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/tc_common.sh @@ -1033,6 +1033,60 @@ max_erp_entries_test() "max chain $chain_failed, mask $mask_failed" } +max_group_size_test() +{ + # The number of ACLs in an ACL group is limited. Once the maximum + # number of ACLs has been reached, filters cannot be added. This test + # verifies that when this limit is reached, insertion fails without + # crashing. + + RET=0 + + local num_acls=32 + local max_size + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + for ((i=1; i < $num_acls; i++)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter add dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter add dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + + ret=$? + [[ $ret -ne 0 ]] && max_size=$((i - 1)) && break + done + + # We expect to exceed the maximum number of ACLs in a group, so that + # insertion eventually fails. Otherwise, the test should be adjusted to + # add more filters. + check_fail $ret "expected to exceed number of ACLs in a group" + + for ((; i >= 1; i--)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter del dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter del dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + done + + log_test "max ACL group size test ($tcflags). max size $max_size" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 1b08e042cf94..185b02d2d4cd 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -269,6 +269,7 @@ for port in 0 1; do echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` + ifconfig $NSIM_NETDEV up msg="new NIC device created" exp0=( 0 0 0 0 ) @@ -430,6 +431,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "overflow NIC table" @@ -487,6 +489,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "overflow NIC table" @@ -543,6 +546,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "destroy NIC" @@ -572,6 +576,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -632,6 +637,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error @@ -687,6 +693,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -746,6 +753,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -876,6 +884,7 @@ msg="re-add a port" echo 2 > $NSIM_DEV_SYS/del_port echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` check_tables msg="replace VxLAN in overflow table" diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 265b6882cc21..b5e3a3aad4bf 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,3 +1,5 @@ +CONFIG_DUMMY=y +CONFIG_IPV6=y +CONFIG_MACVLAN=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_MACVLAN=y diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6efbe1..ec40a33c29fd 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -226,13 +226,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -329,10 +331,9 @@ static void dirty_ring_continue_vcpu(void) } static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { - /* We only have one vcpu */ - static uint32_t fetch_index = 0; uint32_t count = 0, cleared; bool continued_vcpu = false; @@ -349,11 +350,15 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), - slot, bitmap, num_pages, &fetch_index); + slot, bitmap, num_pages, + ring_buf_idx); cleared = kvm_vm_reset_dirty_ring(vcpu->vm); - /* Cleared pages should be the same as collected */ + /* + * Cleared pages should be the same as collected, as KVM is supposed to + * clear only the entries that have been harvested. + */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); @@ -392,12 +397,6 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) } } -static void dirty_ring_before_vcpu_join(void) -{ - /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&sem_vcpu_cont); -} - struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -406,10 +405,10 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages); + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); - void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -428,7 +427,6 @@ struct log_mode { .supported = dirty_ring_supported, .create_vm_done = dirty_ring_create_vm_done, .collect_dirty_pages = dirty_ring_collect_dirty_pages, - .before_vcpu_join = dirty_ring_before_vcpu_join, .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; @@ -471,13 +469,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; TEST_ASSERT(mode->collect_dirty_pages != NULL, "collect_dirty_pages() is required for any log mode!"); - mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages); + mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) @@ -488,14 +487,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void log_mode_before_vcpu_join(void) -{ - struct log_mode *mode = &log_modes[host_log_mode]; - - if (mode->before_vcpu_join) - mode->before_vcpu_join(); -} - static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -696,6 +687,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap; + uint32_t ring_buf_idx = 0; + int sem_val; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -767,10 +760,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); - host_quit = false; + WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + + /* + * Ensure the previous iteration didn't leave a dangling semaphore, i.e. + * that the main task and vCPU worker were synchronized and completed + * verification of all iterations. + */ + sem_getvalue(&sem_vcpu_stop, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + sem_getvalue(&sem_vcpu_cont, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); @@ -778,7 +782,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Give the vcpu thread some time to dirty some pages */ usleep(p->interval * 1000); log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages); + bmap, host_num_pages, + &ring_buf_idx); /* * See vcpu_sync_stop_requested definition for details on why @@ -796,15 +801,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) assert(host_log_mode == LOG_MODE_DIRTY_RING || atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); - sem_post(&sem_vcpu_cont); - iteration++; + /* + * Set host_quit before sem_vcpu_cont in the final iteration to + * ensure that the vCPU worker doesn't resume the guest. As + * above, the dirty ring test may stop and wait even when not + * explicitly request to do so, i.e. would hang waiting for a + * "continue" if it's allowed to resume the guest. + */ + if (++iteration == p->iterations) + WRITE_ONCE(host_quit, true); + + sem_post(&sem_vcpu_cont); sync_global_to_guest(vm, iteration); } - /* Tell the vcpu thread to quit */ - host_quit = true; - log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " diff --git a/tools/testing/selftests/mqueue/setting b/tools/testing/selftests/mqueue/setting new file mode 100644 index 000000000000..a953c96aa16e --- /dev/null +++ b/tools/testing/selftests/mqueue/setting @@ -0,0 +1 @@ +timeout=180 diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index 330d0b1ceced..c921750ca118 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -91,7 +91,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "TCLASS $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null @@ -128,7 +128,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "HOPLIMIT $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index bd89198cd817..ec097f245726 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -13,6 +13,7 @@ CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y +CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 697994a9278b..8d7a1a004b7c 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,14 +6,49 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_IPV6=y +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_SAMPLE=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_META=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPIP=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_TABLES=m CONFIG_VETH=m CONFIG_NAMESPACES=y CONFIG_NET_NS=y +CONFIG_VXLAN=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh index ac97f07e5ce8..bd3f7d492af2 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -354,7 +354,7 @@ __ping_ipv4() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome ARP noise. - PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 @@ -410,7 +410,7 @@ __ping_ipv6() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome neighbor discovery noise. - PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 100 diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh index d880df89bc8b..e83fde79f40d 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh @@ -457,7 +457,7 @@ __ping_ipv4() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome ARP noise. - PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 @@ -522,7 +522,7 @@ __ping_ipv6() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome neighbor discovery noise. - PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 100 diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44dae..4f80014cae49 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,8 +22,11 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 400cf1ce96e3..3df4a8103c76 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -56,7 +56,7 @@ __chk_nr() echo "[ skip ] Feature probably not supported" else echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + ret=${KSFT_FAIL} fi else echo "[ ok ]" @@ -100,10 +100,10 @@ wait_msk_nr() printf "%-50s" "$msg" if [ $i -ge $timeout ]; then echo "[ fail ] timeout while expecting $expected max $max last $nr" - ret=$test_cnt + ret=${KSFT_FAIL} elif [ $nr != $expected ]; then echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + ret=${KSFT_FAIL} else echo "[ ok ]" fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2107579e2939..a20dca9d26d6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -144,6 +144,11 @@ check_tools() exit $ksft_skip fi + if ! ss -h | grep -q MPTCP; then + echo "SKIP: ss tool does not support MPTCP" + exit $ksft_skip + fi + # Use the legacy version if available to support old kernel versions if iptables-legacy -V &> /dev/null; then iptables="iptables-legacy" diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf05db..abc5648b59ab 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 4a417f9d51d6..06ad0510469e 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -301,10 +301,10 @@ done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" exit $ret diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 0d705fdcf3b7..00ab4c6e4044 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -713,23 +713,23 @@ setup_xfrm6() { } setup_xfrm4udp() { - setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udp() { - setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_xfrm4udprouted() { - setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udprouted() { - setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_routing_old() { @@ -1928,6 +1928,13 @@ check_command() { return 0 } +check_running() { + pid=${1} + cmd=${2} + + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] +} + test_cleanup_vxlanX_exception() { outer="${1}" encap="vxlan" @@ -1958,11 +1965,12 @@ test_cleanup_vxlanX_exception() { ${ns_a} ip link del dev veth_A-R1 & iplink_pid=$! - sleep 1 - if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then - err " can't delete veth device in a timely manner, PMTU dst likely leaked" - return 1 - fi + for i in $(seq 1 20); do + check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0 + sleep 0.1 + done + err " can't delete veth device in a timely manner, PMTU dst likely leaked" + return 1 } test_cleanup_ipv6_exception() { diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index 1003ddf7b3b2..227fd1076f21 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -8,7 +8,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5b80fb155d54..d89ee6e1926c 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -926,12 +926,12 @@ TEST_F(tls, recv_partial) memset(recv_mem, 0, sizeof(recv_mem)); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first), + MSG_WAITALL), strlen(test_str_first)); EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); memset(recv_mem, 0, sizeof(recv_mem)); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second), + MSG_WAITALL), strlen(test_str_second)); EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 0); } diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index c079565add39..9690a5d7ffd7 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -37,6 +37,10 @@ create_ns() { for ns in $NS_SRC $NS_DST; do ip netns add $ns ip -n $ns link set dev lo up + + # disable route solicitations to decrease 'noise' traffic + ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0 done ip link add name veth$SRC type veth peer name veth$DST @@ -78,6 +82,12 @@ create_vxlan_pair() { create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad done + + # preload neighbur cache, do avoid some noisy traffic + local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]') + local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]') + ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC + ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST } is_ipv6() { @@ -117,7 +127,7 @@ run_test() { # not enable GRO ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 - ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & + ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args & local spid=$! sleep 0.1 ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst @@ -166,7 +176,7 @@ run_bench() { # bind the sender and the receiver to different CPUs to try # get reproducible results ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" - ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & + ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 & local spid=$! sleep 0.1 ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index f35a924d4a30..1cbadd267c96 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -375,7 +375,7 @@ static void do_recv(void) do_flush_udp(fd); tnow = gettimeofday_ms(); - if (tnow > treport) { + if (!cfg_expected_pkt_nr && tnow > treport) { if (packets) fprintf(stderr, "%s rx: %6lu MB/s %8lu calls/s\n", diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds index a1ec64f7d91f..108bc11d1d8c 100644 --- a/tools/testing/selftests/sgx/test_encl.lds +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -34,8 +34,4 @@ SECTIONS } } -ASSERT(!DEFINED(.altinstructions), "ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.altinstr_replacement), "ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.discard.retpoline_safe), "RETPOLINE ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.discard.nospec), "RETPOLINE ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.got.plt), "Libcalls are not supported in enclaves") +ASSERT(!DEFINED(_GLOBAL_OFFSET_TABLE_), "Libcalls through GOT are not supported in enclaves") diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json deleted file mode 100644 index f5bc8670a67d..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json +++ /dev/null @@ -1,94 +0,0 @@ -[ - { - "id": "7628", - "name": "Create ATM with default setting", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "390a", - "name": "Delete ATM with valid handle", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root atm" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "32a0", - "name": "Show ATM class", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class atm 1: parent 1:", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6310", - "name": "Dump ATM stats", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json deleted file mode 100644 index 1ab21c83a122..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json +++ /dev/null @@ -1,184 +0,0 @@ -[ - { - "id": "3460", - "name": "Create CBQ with default setting", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "0592", - "name": "Create CBQ with mpu", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 mpu 1000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4684", - "name": "Create CBQ with valid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 128", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4345", - "name": "Create CBQ with invalid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 100", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4525", - "name": "Create CBQ with valid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 16", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6784", - "name": "Create CBQ with invalid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 128", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5468", - "name": "Delete CBQ with handle", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "492a", - "name": "Show CBQ class", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class cbq 1: root rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json deleted file mode 100644 index c030795f9c37..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json +++ /dev/null @@ -1,140 +0,0 @@ -[ - { - "id": "6345", - "name": "Create DSMARK with default setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "3462", - "name": "Create DSMARK with default_index setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 512", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0200", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "ca95", - "name": "Create DSMARK with set_tc_index flag", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "a950", - "name": "Create DSMARK with multiple setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4092", - "name": "Delete DSMARK with handle", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5930", - "name": "Show DSMARK class", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class dsmark 1:", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh index 0899019a7fcb..e14bdd4455f2 100644 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 0d85be2350fa..a81165930785 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -470,7 +470,7 @@ static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t m if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 312889edb84a..c65c55b7a789 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -15,6 +15,7 @@ #include #include #include +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -70,10 +71,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { diff --git a/tools/testing/selftests/vm/va_128TBswitch.sh b/tools/testing/selftests/vm/va_128TBswitch.sh index 41580751dc51..231622b3a232 100755 --- a/tools/testing/selftests/vm/va_128TBswitch.sh +++ b/tools/testing/selftests/vm/va_128TBswitch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh index 70a02301f4c2..3d2d2eb9d6ff 100644 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 22e28b76f800..6912e9577b65 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ -fasynchronous-unwind-tables -fstack-clash-protection WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized +ifeq ($(CC),clang) + FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS)) + WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) +endif + TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -LDFLAGS := -ggdb $(EXTRA_LDFLAGS) +LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS) LIBS := $$($(PKG_CONFIG) --libs libtracefs) SRC := $(wildcard src/*.c) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index fe34452fc4ec..b9658f213cb5 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -129,8 +129,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, if (params->output_divisor) duration = duration / params->output_divisor; - if (data->bucket_size) - bucket = duration / data->bucket_size; + bucket = duration / data->bucket_size; total_duration = duration * count; @@ -472,7 +471,11 @@ static void osnoise_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 76479bfb2922..6c07f360de72 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -282,7 +282,11 @@ void osnoise_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 4b48af8a8309..ed08295bfa12 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -151,8 +151,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu, if (params->output_divisor) latency = latency / params->output_divisor; - if (data->bucket_size) - bucket = latency / data->bucket_size; + bucket = latency / data->bucket_size; if (!thread) { hist = data->hist[cpu].irq; @@ -475,7 +474,11 @@ static void timerlat_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 334271935222..8fc0f6aa19da 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -305,7 +305,11 @@ static void timerlat_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 663a047f794d..8c8d63c7196c 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -243,12 +243,6 @@ static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, return syscall(__NR_sched_setattr, pid, attr, flags); } -static inline int sched_getattr(pid_t pid, struct sched_attr *attr, - unsigned int size, unsigned int flags) -{ - return syscall(__NR_sched_getattr, pid, attr, size, flags); -} - int __set_sched_attr(int pid, struct sched_attr *attr) { int flags = 0; @@ -484,13 +478,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param) if (prio == INVALID_VAL) return -1; - if (prio < sched_get_priority_min(SCHED_OTHER)) + if (prio < MIN_NICE) return -1; - if (prio > sched_get_priority_max(SCHED_OTHER)) + if (prio > MAX_NICE) return -1; sched_param->sched_policy = SCHED_OTHER; - sched_param->sched_priority = prio; + sched_param->sched_nice = prio; break; default: return -1; diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index 5571afd3b549..92da41aaf4c4 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -7,6 +7,8 @@ */ #define BUFF_U64_STR_SIZE 24 #define MAX_PATH 1024 +#define MAX_NICE 20 +#define MIN_NICE -19 #define container_of(ptr, type, member)({ \ const typeof(((type *)0)->member) *__mptr = (ptr); \ diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 9bfe1d6f6529..adaf6f141804 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -88,7 +88,27 @@ static void async_pf_execute(struct work_struct *work) __kvm_vcpu_wake_up(vcpu); mmput(mm); - kvm_put_kvm(vcpu->kvm); +} + +static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work) +{ + /* + * The async #PF is "done", but KVM must wait for the work item itself, + * i.e. async_pf_execute(), to run to completion. If KVM is a module, + * KVM must ensure *no* code owned by the KVM (the module) can be run + * after the last call to module_put(). Note, flushing the work item + * is always required when the item is taken off the completion queue. + * E.g. even if the vCPU handles the item in the "normal" path, the VM + * could be terminated before async_pf_execute() completes. + * + * Wake all events skip the queue and go straight done, i.e. don't + * need to be flushed (but sanity check that the work wasn't queued). + */ + if (work->wakeup_all) + WARN_ON_ONCE(work->work.func); + else + flush_work(&work->work); + kmem_cache_free(async_pf_cache, work); } void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) @@ -115,7 +135,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) #else if (cancel_work_sync(&work->work)) { mmput(work->mm); - kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } #endif @@ -127,7 +146,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_first_entry(&vcpu->async_pf.done, typeof(*work), link); list_del(&work->link); - kmem_cache_free(async_pf_cache, work); + + spin_unlock(&vcpu->async_pf.lock); + kvm_flush_and_free_async_pf_work(work); + spin_lock(&vcpu->async_pf.lock); } spin_unlock(&vcpu->async_pf.lock); @@ -152,7 +174,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->queue); vcpu->async_pf.queued--; - kmem_cache_free(async_pf_cache, work); + kvm_flush_and_free_async_pf_work(work); } } @@ -187,7 +209,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, work->arch = *arch; work->mm = current->mm; mmget(work->mm); - kvm_get_kvm(work->vcpu->kvm); INIT_WORK(&work->work, async_pf_execute);