This is the 6.1.115 stable release

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmckJzMACgkQONu9yGCS
 aT7HOQ/8DFCJv+qHFaOujVD60FxOuNegbm3NDqZrFDQatuizL908Ernfo1I4LHgr
 YZwHPlPn1O45k4QgdM2cJyLNS8kxI1aRbck6/3a1j/RxWMBeIepmUYPnzNKIgQCS
 Z1mog42B7c5UbZVhSC21HmVZl6d0buAoHDNMG8XEM1brBf2MDcoy071nH0w7oQLl
 OSNKRkC0ED7w6qrfT9q2ZwrZm1MntxDHalm7a0KOngfQYAY61jr03p7H8gJ4qeA4
 rNMBj0AQch+clYrEvWWX3SO6QMAXfgqXcLglvy2xQJ5ZLaZie+sfokfyGOcNB1t6
 ANeYVNjU2g7moz7EuZ+iutiBlOmRuv4rpa93QwFNQFJxfpRB4avUWs7uNtyYSbX/
 tJqfIJ9Hpepjd6czv0tTw5CnnpiwKNWMSZkxaNp9r4I+CDVv5/+eCO9os/MF/hdT
 2+m1Q/BN6HWRQceUAulLGguLitTAPlRNa06psci1lVc4y0Eb1S5eSEsR0S+be1qR
 xgViV6I2ZPClkFP+tKpNbRCsgkVSiwrVR/TmNm2VQnQql7+ZWsufXHAMzPgmPpV/
 6EOsBFdD6piC6wMLx6YVg2sHLGIq0/z5v4PNqhRRkQvb0iL6PwXmYzABr9CUse0I
 Zp9nvZhKnjBwuM+KoYp+FCwC9Bm4eDTG6dRTXBhyozB4tMAJl5c=
 =9IYd
 -----END PGP SIGNATURE-----

Merge tag 'v6.1.115'

This is the 6.1.115 stable release

* tag 'v6.1.115': (2780 commits)
  Linux 6.1.115
  xfrm: validate new SA's prefixlen using SA family when sel.family is unset
  arm64/uprobes: change the uprobe_opcode_t typedef to fix the sparse warning
  ACPI: PRM: Clean up guid type in struct prm_handler_info
  platform/x86: dell-wmi: Ignore suspend notifications
  ASoC: qcom: Fix NULL Dereference in asoc_qcom_lpass_cpu_platform_probe()
  net: phy: dp83822: Fix reset pin definitions
  serial: protect uart_port_dtr_rts() in uart_shutdown() too
  selinux: improve error checking in sel_write_load()
  drm/amd/display: Disable PSR-SU on Parade 08-01 TCON too
  hv_netvsc: Fix VF namespace also in synthetic NIC NETDEV_REGISTER event
  xfrm: fix one more kernel-infoleak in algo dumping
  LoongArch: Get correct cores_per_package for SMT systems
  ALSA: hda/realtek: Add subwoofer quirk for Acer Predator G9-593
  KVM: arm64: Don't eagerly teardown the vgic on init error
  KVM: nSVM: Ignore nCR3[4:0] when loading PDPTEs from memory
  openat2: explicitly return -E2BIG for (usize > PAGE_SIZE)
  nilfs2: fix kernel bug due to missing clearing of buffer delay flag
  ACPI: button: Add DMI quirk for Samsung Galaxy Book2 to fix initial lid detection issue
  ACPI: PRM: Find EFI_MEMORY_RUNTIME block for PRM handler and context
  ...

Change-Id: Iee600c49a5c914b79141c62cda38e787e429a167

Conflicts:
	arch/arm64/boot/dts/rockchip/rk356x.dtsi
	drivers/gpio/gpio-rockchip.c
	drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
	drivers/gpu/drm/rockchip/rockchip_drm_vop.c
	drivers/gpu/drm/rockchip/rockchip_drm_vop.h
	drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
	drivers/gpu/drm/rockchip/rockchip_vop_reg.c
	drivers/media/i2c/imx335.c
	drivers/pci/controller/dwc/pcie-dw-rockchip.c
	drivers/spi/spi-rockchip.c
	drivers/spi/spidev.c
	drivers/usb/dwc3/gadget.c
	drivers/usb/host/xhci.h
This commit is contained in:
Tao Huang 2025-01-10 17:48:30 +08:00
commit 96900fe2fe
2200 changed files with 33639 additions and 21718 deletions

View file

@ -132,12 +132,15 @@ config ZSWAP_ZPOOL_DEFAULT_ZBUD
help
Use the zbud allocator as the default allocator.
config ZSWAP_ZPOOL_DEFAULT_Z3FOLD
bool "z3fold"
select Z3FOLD
config ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED
bool "z3foldi (DEPRECATED)"
select Z3FOLD_DEPRECATED
help
Use the z3fold allocator as the default allocator.
Deprecated and scheduled for removal in a few cycles,
see CONFIG_Z3FOLD_DEPRECATED.
config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
bool "zsmalloc"
select ZSMALLOC
@ -149,7 +152,7 @@ config ZSWAP_ZPOOL_DEFAULT
string
depends on ZSWAP
default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD
default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD
default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED
default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
default ""
@ -163,15 +166,25 @@ config ZBUD
deterministic reclaim properties that make it preferable to a higher
density approach when reclaim will be used.
config Z3FOLD
tristate "3:1 compression allocator (z3fold)"
config Z3FOLD_DEPRECATED
tristate "3:1 compression allocator (z3fold) (DEPRECATED)"
depends on ZSWAP
help
Deprecated and scheduled for removal in a few cycles. If you have
a good reason for using Z3FOLD over ZSMALLOC, please contact
linux-mm@kvack.org and the zswap maintainers.
A special purpose allocator for storing compressed pages.
It is designed to store up to three compressed pages per physical
page. It is a ZBUD derivative so the simplicity and determinism are
still there.
config Z3FOLD
tristate
default y if Z3FOLD_DEPRECATED=y
default m if Z3FOLD_DEPRECATED=m
depends on Z3FOLD_DEPRECATED
config ZSMALLOC
tristate
prompt "N:1 compression allocator (zsmalloc)" if ZSWAP
@ -632,6 +645,17 @@ config HUGETLB_PAGE_SIZE_VARIABLE
config CONTIG_ALLOC
def_bool (MEMORY_ISOLATION && COMPACTION) || CMA
config PCP_BATCH_SCALE_MAX
int "Maximum scale factor of PCP (Per-CPU pageset) batch allocate/free"
default 5
range 0 6
help
In page allocator, PCP (Per-CPU pageset) is refilled and drained in
batches. The batch number is scaled automatically to improve page
allocation/free throughput. But too large scale factor may hurt
latency. This option sets the upper limit of scale factor to limit
the maximum latency.
config PHYS_ADDR_T_64BIT
def_bool 64BIT

View file

@ -921,14 +921,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
* access frequencies are similar. This is for minimizing the monitoring
* overhead under the dynamically changeable access pattern. If a merge was
* unnecessarily made, later 'kdamond_split_regions()' will revert it.
*
* The total number of regions could be higher than the user-defined limit,
* max_nr_regions for some cases. For example, the user can update
* max_nr_regions to a number that lower than the current number of regions
* while DAMON is running. For such a case, repeat merging until the limit is
* met while increasing @threshold up to possible maximum level.
*/
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
unsigned long sz_limit)
{
struct damon_target *t;
unsigned int nr_regions;
unsigned int max_thres;
damon_for_each_target(t, c)
damon_merge_regions_of(t, threshold, sz_limit);
max_thres = c->attrs.aggr_interval /
(c->attrs.sample_interval ? c->attrs.sample_interval : 1);
do {
nr_regions = 0;
damon_for_each_target(t, c) {
damon_merge_regions_of(t, threshold, sz_limit);
nr_regions += damon_nr_regions(t);
}
threshold = max(1, threshold * 2);
} while (nr_regions > c->attrs.max_nr_regions &&
threshold / 2 < max_thres);
}
/*

View file

@ -126,6 +126,7 @@ static int __damon_va_three_regions(struct mm_struct *mm,
* If this is too slow, it can be optimised to examine the maple
* tree gaps.
*/
rcu_read_lock();
for_each_vma(vmi, vma) {
unsigned long gap;
@ -146,6 +147,7 @@ static int __damon_va_three_regions(struct mm_struct *mm,
next:
prev = vma;
}
rcu_read_unlock();
if (!sz_range(&second_gap) || !sz_range(&first_gap))
return -EINVAL;

View file

@ -841,6 +841,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
{
XA_STATE(xas, &mapping->i_pages, index);
int huge = folio_test_hugetlb(folio);
void *alloced_shadow = NULL;
int alloced_order = 0;
bool charged = false;
long nr = 1;
@ -863,13 +865,10 @@ noinline int __filemap_add_folio(struct address_space *mapping,
folio->mapping = mapping;
folio->index = xas.xa_index;
do {
unsigned int order = xa_get_order(xas.xa, xas.xa_index);
for (;;) {
int order = -1, split_order = 0;
void *entry, *old = NULL;
if (order > folio_order(folio))
xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
order, gfp);
xas_lock_irq(&xas);
xas_for_each_conflict(&xas, entry) {
old = entry;
@ -877,19 +876,33 @@ noinline int __filemap_add_folio(struct address_space *mapping,
xas_set_err(&xas, -EEXIST);
goto unlock;
}
/*
* If a larger entry exists,
* it will be the first and only entry iterated.
*/
if (order == -1)
order = xas_get_order(&xas);
}
/* entry may have changed before we re-acquire the lock */
if (alloced_order && (old != alloced_shadow || order != alloced_order)) {
xas_destroy(&xas);
alloced_order = 0;
}
if (old) {
if (shadowp)
*shadowp = old;
/* entry may have been split before we acquired lock */
order = xa_get_order(xas.xa, xas.xa_index);
if (order > folio_order(folio)) {
if (order > 0 && order > folio_order(folio)) {
/* How to handle large swap entries? */
BUG_ON(shmem_mapping(mapping));
if (!alloced_order) {
split_order = order;
goto unlock;
}
xas_split(&xas, old, order);
xas_reset(&xas);
}
if (shadowp)
*shadowp = old;
}
xas_store(&xas, folio);
@ -905,9 +918,24 @@ noinline int __filemap_add_folio(struct address_space *mapping,
__lruvec_stat_mod_folio(folio,
NR_FILE_THPS, nr);
}
unlock:
xas_unlock_irq(&xas);
} while (xas_nomem(&xas, gfp));
/* split needed, alloc here and retry. */
if (split_order) {
xas_split_alloc(&xas, old, split_order, gfp);
if (xas_error(&xas))
goto error;
alloced_shadow = old;
alloced_order = split_order;
xas_reset(&xas);
continue;
}
if (!xas_nomem(&xas, gfp))
break;
}
if (xas_error(&xas))
goto error;

View file

@ -608,7 +608,7 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
loff_t off_align = round_up(off, size);
unsigned long len_pad, ret;
if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall())
return 0;
if (off_end <= off_align || (off_end - off_align) < size)
@ -1492,7 +1492,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
goto out;
return 0;
}
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@ -1525,23 +1525,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
if (migrated) {
flags |= TNF_MIGRATED;
page_nid = target_nid;
} else {
flags |= TNF_MIGRATE_FAIL;
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
goto out;
}
goto out_map;
task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
return 0;
}
out:
if (page_nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
flags);
return 0;
flags |= TNF_MIGRATE_FAIL;
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
return 0;
}
out_map:
/* Restore the PMD */
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@ -1551,7 +1544,10 @@ out_map:
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl);
goto out;
if (page_nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
return 0;
}
/*

View file

@ -1785,13 +1785,6 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
return;
}
/*
* Move PageHWPoison flag from head page to the raw error pages,
* which makes any healthy subpages reusable.
*/
if (unlikely(PageHWPoison(page)))
hugetlb_clear_page_hwpoison(page);
/*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb destructor under the hugetlb lock.
@ -1802,6 +1795,13 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
spin_unlock_irq(&hugetlb_lock);
}
/*
* Move PageHWPoison flag from head page to the raw error pages,
* which makes any healthy subpages reusable.
*/
if (unlikely(PageHWPoison(page)))
hugetlb_clear_page_hwpoison(page);
for (i = 0; i < pages_per_huge_page(h); i++) {
subpage = nth_page(page, i);
subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
@ -4353,7 +4353,7 @@ void __init hugetlb_add_hstate(unsigned int order)
BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE);
BUG_ON(order == 0);
h = &hstates[hugetlb_max_hstate++];
mutex_init(&h->resize_lock);
__mutex_init(&h->resize_lock, "resize mutex", &h->resize_key);
h->order = order;
h->mask = ~(huge_page_size(h) - 1);
for (i = 0; i < MAX_NUMNODES; ++i)

View file

@ -55,6 +55,7 @@ enum scan_result {
SCAN_CGROUP_CHARGE_FAIL,
SCAN_TRUNCATED,
SCAN_PAGE_HAS_PRIVATE,
SCAN_STORE_FAILED,
};
#define CREATE_TRACE_POINTS
@ -1840,6 +1841,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
goto xa_locked;
}
xas_store(&xas, hpage);
if (xas_error(&xas)) {
/* revert shmem_charge performed
* in the previous condition
*/
mapping->nrpages--;
shmem_uncharge(mapping->host, 1);
result = SCAN_STORE_FAILED;
goto xa_locked;
}
nr_none++;
continue;
}
@ -1991,6 +2001,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
/* Finally, replace with the new page. */
xas_store(&xas, hpage);
/* We can't get an ENOMEM here (because the allocation happened before)
* but let's check for errors (XArray implementation can be
* changed in the future)
*/
WARN_ON_ONCE(xas_error(&xas));
continue;
out_unlock:
unlock_page(page);
@ -2028,6 +2043,11 @@ out_unlock:
/* Join all the small entries into a single multi-index entry */
xas_set_order(&xas, start, HPAGE_PMD_ORDER);
xas_store(&xas, hpage);
/* Here we can't get an ENOMEM (because entries were
* previously allocated) But let's check for errors
* (XArray implementation can be changed in the future)
*/
WARN_ON_ONCE(xas_error(&xas));
xa_locked:
xas_unlock_irq(&xas);
xa_unlocked:

View file

@ -4857,9 +4857,12 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
buf = endp + 1;
cfd = simple_strtoul(buf, &endp, 10);
if ((*endp != ' ') && (*endp != '\0'))
if (*endp == '\0')
buf = endp;
else if (*endp == ' ')
buf = endp + 1;
else
return -EINVAL;
buf = endp + 1;
event = kzalloc(sizeof(*event), GFP_KERNEL);
if (!event)
@ -5140,11 +5143,28 @@ static struct cftype mem_cgroup_legacy_files[] = {
*/
static DEFINE_IDR(mem_cgroup_idr);
static DEFINE_SPINLOCK(memcg_idr_lock);
static int mem_cgroup_alloc_id(void)
{
int ret;
idr_preload(GFP_KERNEL);
spin_lock(&memcg_idr_lock);
ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX + 1,
GFP_NOWAIT);
spin_unlock(&memcg_idr_lock);
idr_preload_end();
return ret;
}
static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
{
if (memcg->id.id > 0) {
spin_lock(&memcg_idr_lock);
idr_remove(&mem_cgroup_idr, memcg->id.id);
spin_unlock(&memcg_idr_lock);
memcg->id.id = 0;
}
}
@ -5267,8 +5287,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (!memcg)
return ERR_PTR(error);
memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
1, MEM_CGROUP_ID_MAX + 1, GFP_KERNEL);
memcg->id.id = mem_cgroup_alloc_id();
if (memcg->id.id < 0) {
error = memcg->id.id;
goto fail;
@ -5313,7 +5332,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
memcg->deferred_split_queue.split_queue_len = 0;
#endif
spin_lock(&memcg_idr_lock);
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
spin_unlock(&memcg_idr_lock);
lru_gen_init_memcg(memcg);
return memcg;
fail:

View file

@ -2208,7 +2208,7 @@ struct memory_failure_entry {
struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE);
spinlock_t lock;
raw_spinlock_t lock;
struct work_struct work;
};
@ -2234,20 +2234,22 @@ void memory_failure_queue(unsigned long pfn, int flags)
{
struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags;
bool buffer_overflow;
struct memory_failure_entry entry = {
.pfn = pfn,
.flags = flags,
};
mf_cpu = &get_cpu_var(memory_failure_cpu);
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
if (kfifo_put(&mf_cpu->fifo, entry))
raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
if (!buffer_overflow)
schedule_work_on(smp_processor_id(), &mf_cpu->work);
else
raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
if (buffer_overflow)
pr_err("buffer overflow when queuing memory failure at %#lx\n",
pfn);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
}
EXPORT_SYMBOL_GPL(memory_failure_queue);
@ -2260,9 +2262,9 @@ static void memory_failure_work_func(struct work_struct *work)
mf_cpu = container_of(work, struct memory_failure_cpu, work);
for (;;) {
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
if (entry.flags & MF_SOFT_OFFLINE)
@ -2292,7 +2294,7 @@ static int __init memory_failure_init(void)
for_each_possible_cpu(cpu) {
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
spin_lock_init(&mf_cpu->lock);
raw_spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo);
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
}

View file

@ -2480,11 +2480,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
return 0;
}
/*
* Variant of remap_pfn_range that does not call track_pfn_remap. The caller
* must have pre-validated the caching bits of the pgprot_t.
*/
int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
pgd_t *pgd;
@ -2537,6 +2533,27 @@ int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
return 0;
}
/*
* Variant of remap_pfn_range that does not call track_pfn_remap. The caller
* must have pre-validated the caching bits of the pgprot_t.
*/
int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
int error = remap_pfn_range_internal(vma, addr, pfn, size, prot);
if (!error)
return 0;
/*
* A partial pfn range mapping is dangerous: it does not
* maintain page reference counts, and callers may free
* pages due to the error. So zap it early.
*/
zap_page_range_single(vma, addr, size, NULL);
return error;
}
/**
* remap_pfn_range - remap kernel memory to userspace
* @vma: user vma to map to
@ -4786,7 +4803,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
spin_lock(vmf->ptl);
if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
return 0;
}
/* Get the normal PTE */
@ -4841,21 +4858,17 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (migrate_misplaced_page(page, vma, target_nid)) {
page_nid = target_nid;
flags |= TNF_MIGRATED;
} else {
flags |= TNF_MIGRATE_FAIL;
vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
spin_lock(vmf->ptl);
if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
}
goto out_map;
task_numa_fault(last_cpupid, page_nid, 1, flags);
return 0;
}
out:
if (page_nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, page_nid, 1, flags);
return 0;
flags |= TNF_MIGRATE_FAIL;
vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
spin_lock(vmf->ptl);
if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0;
}
out_map:
/*
* Make it present again, depending on how arch implements
@ -4869,7 +4882,10 @@ out_map:
ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
update_mmu_cache(vma, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
if (page_nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, page_nid, 1, flags);
return 0;
}
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)

View file

@ -1530,7 +1530,7 @@ struct range __weak arch_get_mappable_range(void)
struct range mhp_get_pluggable_range(bool need_mapping)
{
const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1;
const u64 max_phys = PHYSMEM_END;
struct range mhp_range;
if (need_mapping) {

View file

@ -3112,8 +3112,9 @@ out:
* @pol: pointer to mempolicy to be formatted
*
* Convert @pol into a string. If @buffer is too short, truncate the string.
* Recommend a @maxlen of at least 32 for the longest mode, "interleave", the
* longest flag, "relative", and to display at least a few node ids.
* Recommend a @maxlen of at least 51 for the longest mode, "weighted
* interleave", plus the longest flag flags, "relative|balancing", and to
* display at least a few node ids.
*/
void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
{
@ -3122,7 +3123,10 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
unsigned short mode = MPOL_DEFAULT;
unsigned short flags = 0;
if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) {
if (pol &&
pol != &default_policy &&
!(pol >= &preferred_node_policy[0] &&
pol <= &preferred_node_policy[ARRAY_SIZE(preferred_node_policy) - 1])) {
mode = pol->mode;
flags = pol->flags;
}
@ -3149,12 +3153,18 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
p += snprintf(p, buffer + maxlen - p, "=");
/*
* Currently, the only defined flags are mutually exclusive
* Static and relative are mutually exclusive.
*/
if (flags & MPOL_F_STATIC_NODES)
p += snprintf(p, buffer + maxlen - p, "static");
else if (flags & MPOL_F_RELATIVE_NODES)
p += snprintf(p, buffer + maxlen - p, "relative");
if (flags & MPOL_F_NUMA_BALANCING) {
if (!is_power_of_2(flags & MPOL_MODE_FLAGS))
p += snprintf(p, buffer + maxlen - p, "|");
p += snprintf(p, buffer + maxlen - p, "balancing");
}
}
if (!nodes_empty(nodes))

View file

@ -19,14 +19,7 @@ EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released);
#ifdef CONFIG_MEMCG
/*
* Our various events all share the same buffer (because we don't want or need
* to allocate a set of buffers *per event type*), so we need to protect against
* concurrent _reg() and _unreg() calls, and count how many _reg() calls have
* been made.
*/
static DEFINE_MUTEX(reg_lock);
static int reg_refcount; /* Protected by reg_lock. */
static atomic_t reg_refcount;
/*
* Size of the buffer for memcg path names. Ignoring stack trace support,
@ -34,136 +27,22 @@ static int reg_refcount; /* Protected by reg_lock. */
*/
#define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL
/*
* How many contexts our trace events might be called in: normal, softirq, irq,
* and NMI.
*/
#define CONTEXT_COUNT 4
struct memcg_path {
local_lock_t lock;
char __rcu *buf;
local_t buf_idx;
};
static DEFINE_PER_CPU(struct memcg_path, memcg_paths) = {
.lock = INIT_LOCAL_LOCK(lock),
.buf_idx = LOCAL_INIT(0),
};
static char **tmp_bufs;
/* Called with reg_lock held. */
static void free_memcg_path_bufs(void)
{
struct memcg_path *memcg_path;
int cpu;
char **old = tmp_bufs;
for_each_possible_cpu(cpu) {
memcg_path = per_cpu_ptr(&memcg_paths, cpu);
*(old++) = rcu_dereference_protected(memcg_path->buf,
lockdep_is_held(&reg_lock));
rcu_assign_pointer(memcg_path->buf, NULL);
}
/* Wait for inflight memcg_path_buf users to finish. */
synchronize_rcu();
old = tmp_bufs;
for_each_possible_cpu(cpu) {
kfree(*(old++));
}
kfree(tmp_bufs);
tmp_bufs = NULL;
}
int trace_mmap_lock_reg(void)
{
int cpu;
char *new;
mutex_lock(&reg_lock);
/* If the refcount is going 0->1, proceed with allocating buffers. */
if (reg_refcount++)
goto out;
tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs),
GFP_KERNEL);
if (tmp_bufs == NULL)
goto out_fail;
for_each_possible_cpu(cpu) {
new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL);
if (new == NULL)
goto out_fail_free;
rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new);
/* Don't need to wait for inflights, they'd have gotten NULL. */
}
out:
mutex_unlock(&reg_lock);
atomic_inc(&reg_refcount);
return 0;
out_fail_free:
free_memcg_path_bufs();
out_fail:
/* Since we failed, undo the earlier ref increment. */
--reg_refcount;
mutex_unlock(&reg_lock);
return -ENOMEM;
}
void trace_mmap_lock_unreg(void)
{
mutex_lock(&reg_lock);
/* If the refcount is going 1->0, proceed with freeing buffers. */
if (--reg_refcount)
goto out;
free_memcg_path_bufs();
out:
mutex_unlock(&reg_lock);
atomic_dec(&reg_refcount);
}
static inline char *get_memcg_path_buf(void)
{
struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths);
char *buf;
int idx;
rcu_read_lock();
buf = rcu_dereference(memcg_path->buf);
if (buf == NULL) {
rcu_read_unlock();
return NULL;
}
idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) -
MEMCG_PATH_BUF_SIZE;
return &buf[idx];
}
static inline void put_memcg_path_buf(void)
{
local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx);
rcu_read_unlock();
}
#define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \
do { \
const char *memcg_path; \
local_lock(&memcg_paths.lock); \
memcg_path = get_mm_memcg_path(mm); \
trace_mmap_lock_##type(mm, \
memcg_path != NULL ? memcg_path : "", \
##__VA_ARGS__); \
if (likely(memcg_path != NULL)) \
put_memcg_path_buf(); \
local_unlock(&memcg_paths.lock); \
#define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \
do { \
char buf[MEMCG_PATH_BUF_SIZE]; \
get_mm_memcg_path(mm, buf, sizeof(buf)); \
trace_mmap_lock_##type(mm, buf, ##__VA_ARGS__); \
} while (0)
#else /* !CONFIG_MEMCG */
@ -185,37 +64,23 @@ void trace_mmap_lock_unreg(void)
#ifdef CONFIG_TRACING
#ifdef CONFIG_MEMCG
/*
* Write the given mm_struct's memcg path to a percpu buffer, and return a
* pointer to it. If the path cannot be determined, or no buffer was available
* (because the trace event is being unregistered), NULL is returned.
*
* Note: buffers are allocated per-cpu to avoid locking, so preemption must be
* disabled by the caller before calling us, and re-enabled only after the
* caller is done with the pointer.
*
* The caller must call put_memcg_path_buf() once the buffer is no longer
* needed. This must be done while preemption is still disabled.
* Write the given mm_struct's memcg path to a buffer. If the path cannot be
* determined or the trace event is being unregistered, empty string is written.
*/
static const char *get_mm_memcg_path(struct mm_struct *mm)
static void get_mm_memcg_path(struct mm_struct *mm, char *buf, size_t buflen)
{
char *buf = NULL;
struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
struct mem_cgroup *memcg;
buf[0] = '\0';
/* No need to get path if no trace event is registered. */
if (!atomic_read(&reg_refcount))
return;
memcg = get_mem_cgroup_from_mm(mm);
if (memcg == NULL)
goto out;
if (unlikely(memcg->css.cgroup == NULL))
goto out_put;
buf = get_memcg_path_buf();
if (buf == NULL)
goto out_put;
cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE);
out_put:
return;
if (memcg->css.cgroup)
cgroup_path(memcg->css.cgroup, buf, buflen);
css_put(&memcg->css);
out:
return buf;
}
#endif /* CONFIG_MEMCG */

View file

@ -3182,14 +3182,21 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
*/
static void drain_pages_zone(unsigned int cpu, struct zone *zone)
{
struct per_cpu_pages *pcp;
struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
int count;
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
if (pcp->count) {
do {
spin_lock(&pcp->lock);
free_pcppages_bulk(zone, pcp->count, pcp, 0);
count = pcp->count;
if (count) {
int to_drain = min(count,
pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
free_pcppages_bulk(zone, to_drain, pcp, 0);
count -= to_drain;
}
spin_unlock(&pcp->lock);
}
} while (count);
}
/*
@ -3395,7 +3402,7 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch,
* freeing of pages without any allocation.
*/
batch <<= pcp->free_factor;
if (batch < max_nr_free)
if (batch < max_nr_free && pcp->free_factor < CONFIG_PCP_BATCH_SCALE_MAX)
pcp->free_factor++;
batch = clamp(batch, min_nr_free, max_nr_free);

View file

@ -236,7 +236,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
/* make sure local flags do not confict with global fcntl.h */
BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
if (!secretmem_enable)
if (!secretmem_enable || !can_set_direct_map())
return -ENOSYS;
if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
@ -278,7 +278,7 @@ static struct file_system_type secretmem_fs = {
static int __init secretmem_init(void)
{
if (!secretmem_enable)
if (!secretmem_enable || !can_set_direct_map())
return 0;
secretmem_mnt = kern_mount(&secretmem_fs);

View file

@ -523,26 +523,6 @@ bool slab_is_available(void)
}
#ifdef CONFIG_PRINTK
/**
* kmem_valid_obj - does the pointer reference a valid slab object?
* @object: pointer to query.
*
* Return: %true if the pointer is to a not-yet-freed object from
* kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer
* is to an already-freed object, and %false otherwise.
*/
bool kmem_valid_obj(void *object)
{
struct folio *folio;
/* Some arches consider ZERO_SIZE_PTR to be a valid address. */
if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
return false;
folio = virt_to_folio(object);
return folio_test_slab(folio);
}
EXPORT_SYMBOL_GPL(kmem_valid_obj);
static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
if (__kfence_obj_info(kpp, object, slab))
@ -561,11 +541,11 @@ static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *
* and, if available, the slab name, return address, and stack trace from
* the allocation and last free path of that object.
*
* This function will splat if passed a pointer to a non-slab object.
* If you are not sure what type of object you have, you should instead
* use mem_dump_obj().
* Return: %true if the pointer is to a not-yet-freed object from
* kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer
* is to an already-freed object, and %false otherwise.
*/
void kmem_dump_obj(void *object)
bool kmem_dump_obj(void *object)
{
char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
int i;
@ -573,13 +553,13 @@ void kmem_dump_obj(void *object)
unsigned long ptroffset;
struct kmem_obj_info kp = { };
if (WARN_ON_ONCE(!virt_addr_valid(object)))
return;
/* Some arches consider ZERO_SIZE_PTR to be a valid address. */
if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
return false;
slab = virt_to_slab(object);
if (WARN_ON_ONCE(!slab)) {
pr_cont(" non-slab memory.\n");
return;
}
if (!slab)
return false;
kmem_obj_info(&kp, object, slab);
if (kp.kp_slab_cache)
pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
@ -616,6 +596,7 @@ void kmem_dump_obj(void *object)
pr_info(" %pS\n", kp.kp_free_stack[i]);
}
return true;
}
EXPORT_SYMBOL_GPL(kmem_dump_obj);
#endif
@ -1341,6 +1322,13 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
/* If the object still fits, repoison it precisely. */
if (ks >= new_size) {
/* Zero out spare memory. */
if (want_init_on_alloc(flags)) {
kasan_disable_current();
memset((void *)p + new_size, 0, ks - new_size);
kasan_enable_current();
}
p = kasan_krealloc((void *)p, new_size, flags);
return (void *)p;
}

View file

@ -129,7 +129,7 @@ static inline int sparse_early_nid(struct mem_section *section)
static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
unsigned long *end_pfn)
{
unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT;
/*
* Sanity checks - do not allow an architecture to pass

View file

@ -2006,7 +2006,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
if (vma->anon_vma) {
if (vma->anon_vma && !is_vm_hugetlb_page(vma)) {
ret = unuse_vma(vma, type);
if (ret)
break;

View file

@ -174,7 +174,7 @@ static void truncate_cleanup_folio(struct folio *folio)
if (folio_mapped(folio))
unmap_mapping_folio(folio);
if (folio_has_private(folio))
if (folio_needs_release(folio))
folio_invalidate(folio, 0, folio_size(folio));
/*
@ -235,7 +235,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
*/
folio_zero_range(folio, offset, length);
if (folio_has_private(folio))
if (folio_needs_release(folio))
folio_invalidate(folio, offset, length);
if (!folio_test_large(folio))
return true;

View file

@ -399,7 +399,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
if (gap + pad > gap)
gap += pad;
if (gap < MIN_GAP)
if (gap < MIN_GAP && MIN_GAP < MAX_GAP)
gap = MIN_GAP;
else if (gap > MAX_GAP)
gap = MAX_GAP;
@ -1119,10 +1119,8 @@ void mem_dump_obj(void *object)
{
const char *type;
if (kmem_valid_obj(object)) {
kmem_dump_obj(object);
if (kmem_dump_obj(object))
return;
}
if (vmalloc_dump_obj(object))
return;

View file

@ -2992,15 +2992,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
page = alloc_pages(alloc_gfp, order);
else
page = alloc_pages_node(nid, alloc_gfp, order);
if (unlikely(!page)) {
if (!nofail)
break;
/* fall back to the zero order allocations */
alloc_gfp |= __GFP_NOFAIL;
order = 0;
continue;
}
if (unlikely(!page))
break;
/*
* Higher order allocations must be able to be treated as

View file

@ -5065,7 +5065,6 @@ retry:
/* retry folios that may have missed folio_rotate_reclaimable() */
list_move(&folio->lru, &clean);
sc->nr_scanned -= folio_nr_pages(folio);
}
spin_lock_irq(&lruvec->lru_lock);