Currently it's not possible to register kernel buffers with TEE which are allocated via vmalloc.
Use iov_iter and associated helper functions to manage the page registration for all type of memories.
Suggested-by: Christoph Hellwig hch@infradead.org Signed-off-by: Arnaud Pouliquen arnaud.pouliquen@foss.st.com --- Update from V3 to V4: - improve commit message, - use import_ubuf() instead of iov_iter_init(), - move shm_get_kernel_pages in register_shm_helper, - put back untagged_addr in register_shm_helper(), - move the comment related to pin pages from shm_get_kernel_pages() to register_shm_helper().
Update from V2 to V3: - break lines longer than 80 columns.
Update from V1 to V2: - replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(), - replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
V1: The support of buffer registration allocated with vmalloc is no more available since c83900393aa1 ("tee: Remove vmalloc page support").
This patch is an alternative to a revert and resulted from a discussion with Christopher Hellwig [1].
This patch has been tested using xtest tool in optee qemu environment [2] and using the series related to the remoteproc tee that should be proposed soon [3].
References: [1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb... [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-inst... [3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb... --- drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 37 deletions(-)
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 673cf0359494..ac73e8143233 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count) put_page(pages[n]); }
-static int shm_get_kernel_pages(unsigned long start, size_t page_count, - struct page **pages) +static void shm_get_kernel_pages(struct page **pages, size_t page_count) { - struct page *page; size_t n;
- if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) || - is_kmap_addr((void *)start))) - return -EINVAL; - - page = virt_to_page((void *)start); - for (n = 0; n < page_count; n++) { - pages[n] = page + n; + for (n = 0; n < page_count; n++) get_page(pages[n]); - } - - return page_count; }
static void release_registered_pages(struct tee_shm *shm) @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size) EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
static struct tee_shm * -register_shm_helper(struct tee_context *ctx, unsigned long addr, - size_t length, u32 flags, int id) +register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags, + int id) { struct tee_device *teedev = ctx->teedev; struct tee_shm *shm; - unsigned long start; - size_t num_pages; + unsigned long start, addr; + size_t num_pages, off; + ssize_t len; void *ret; int rc;
@@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr, shm->flags = flags; shm->ctx = ctx; shm->id = id; - addr = untagged_addr(addr); + addr = untagged_addr((unsigned long)iter_iov_addr(iter)); start = rounddown(addr, PAGE_SIZE); - shm->offset = addr - start; - shm->size = length; - num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE; + num_pages = iov_iter_npages(iter, INT_MAX); + if (!num_pages) { + ret = ERR_PTR(-ENOMEM); + goto err_ctx_put; + } + shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL); if (!shm->pages) { ret = ERR_PTR(-ENOMEM); goto err_free_shm; }
- if (flags & TEE_SHM_USER_MAPPED) - rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE, - shm->pages); - else - rc = shm_get_kernel_pages(start, num_pages, shm->pages); - if (rc > 0) - shm->num_pages = rc; - if (rc != num_pages) { - if (rc >= 0) - rc = -ENOMEM; - ret = ERR_PTR(rc); - goto err_put_shm_pages; + len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0, + &off); + if (unlikely(len <= 0)) { + ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM); + goto err_free_shm_pages; }
+ /* + * iov_iter_extract_kvec_pages does not get reference on the pages, + * get a pin on them. + */ + if (iov_iter_is_kvec(iter)) + shm_get_kernel_pages(shm->pages, num_pages); + + shm->offset = off; + shm->size = len; + shm->num_pages = num_pages; + rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, shm->num_pages, start); if (rc) { @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
return shm; err_put_shm_pages: - if (flags & TEE_SHM_USER_MAPPED) + if (!iov_iter_is_kvec(iter)) unpin_user_pages(shm->pages, shm->num_pages); else shm_put_kernel_pages(shm->pages, shm->num_pages); +err_free_shm_pages: kfree(shm->pages); err_free_shm: kfree(shm); @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC; struct tee_device *teedev = ctx->teedev; struct tee_shm *shm; + struct iov_iter iter; void *ret; - int id; + int id, err;
if (!access_ok((void __user *)addr, length)) return ERR_PTR(-EFAULT); @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, if (id < 0) return ERR_PTR(id);
- shm = register_shm_helper(ctx, addr, length, flags, id); + err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter); + if (err) + return ERR_PTR(err); + + shm = register_shm_helper(ctx, &iter, flags, id); if (IS_ERR(shm)) { mutex_lock(&teedev->mutex); idr_remove(&teedev->idr, id); @@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length) { u32 flags = TEE_SHM_DYNAMIC; + struct kvec kvec; + struct iov_iter iter; + + kvec.iov_base = addr; + kvec.iov_len = length; + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
- return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1); + return register_shm_helper(ctx, &iter, flags, -1); } EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen arnaud.pouliquen@foss.st.com wrote:
Currently it's not possible to register kernel buffers with TEE which are allocated via vmalloc.
Use iov_iter and associated helper functions to manage the page registration for all type of memories.
Suggested-by: Christoph Hellwig hch@infradead.org Signed-off-by: Arnaud Pouliquen arnaud.pouliquen@foss.st.com
Update from V3 to V4:
- improve commit message,
- use import_ubuf() instead of iov_iter_init(),
- move shm_get_kernel_pages in register_shm_helper,
- put back untagged_addr in register_shm_helper(),
- move the comment related to pin pages from shm_get_kernel_pages() to register_shm_helper().
Update from V2 to V3:
- break lines longer than 80 columns.
Update from V1 to V2:
- replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
- replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
V1: The support of buffer registration allocated with vmalloc is no more available since c83900393aa1 ("tee: Remove vmalloc page support").
This patch is an alternative to a revert and resulted from a discussion with Christopher Hellwig [1].
This patch has been tested using xtest tool in optee qemu environment [2] and using the series related to the remoteproc tee that should be proposed soon [3].
References: [1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb... [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-inst... [3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb...
drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 37 deletions(-)
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 673cf0359494..ac73e8143233 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count) put_page(pages[n]); }
-static int shm_get_kernel_pages(unsigned long start, size_t page_count,
struct page **pages)
+static void shm_get_kernel_pages(struct page **pages, size_t page_count) {
struct page *page; size_t n;
if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
is_kmap_addr((void *)start)))
return -EINVAL;
page = virt_to_page((void *)start);
for (n = 0; n < page_count; n++) {
pages[n] = page + n;
for (n = 0; n < page_count; n++) get_page(pages[n]);
}
return page_count;
}
static void release_registered_pages(struct tee_shm *shm) @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size) EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
static struct tee_shm * -register_shm_helper(struct tee_context *ctx, unsigned long addr,
size_t length, u32 flags, int id)
+register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
int id)
{ struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
unsigned long start;
size_t num_pages;
unsigned long start, addr;
size_t num_pages, off;
ssize_t len; void *ret; int rc;
@@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr, shm->flags = flags; shm->ctx = ctx; shm->id = id;
addr = untagged_addr(addr);
addr = untagged_addr((unsigned long)iter_iov_addr(iter)); start = rounddown(addr, PAGE_SIZE);
shm->offset = addr - start;
shm->size = length;
num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
num_pages = iov_iter_npages(iter, INT_MAX);
if (!num_pages) {
ret = ERR_PTR(-ENOMEM);
goto err_ctx_put;
}
shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL); if (!shm->pages) { ret = ERR_PTR(-ENOMEM); goto err_free_shm; }
if (flags & TEE_SHM_USER_MAPPED)
rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
shm->pages);
else
rc = shm_get_kernel_pages(start, num_pages, shm->pages);
if (rc > 0)
shm->num_pages = rc;
if (rc != num_pages) {
if (rc >= 0)
rc = -ENOMEM;
ret = ERR_PTR(rc);
goto err_put_shm_pages;
len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
&off);
if (unlikely(len <= 0)) {
ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
goto err_free_shm_pages; }
/*
* iov_iter_extract_kvec_pages does not get reference on the pages,
* get a pin on them.
I think you meant: "get a reference on them". But I don't see the value of this comment since iov_iter_extract_kvec_pages() already has been commented properly as follows:
/* * Extract a list of virtually contiguous pages from an ITER_KVEC iterator. * This does not get references on the pages, nor does it get a pin on them. */
*/
if (iov_iter_is_kvec(iter))
shm_get_kernel_pages(shm->pages, num_pages);
shm->offset = off;
shm->size = len;
shm->num_pages = num_pages;
rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, shm->num_pages, start); if (rc) {
@@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
return shm;
err_put_shm_pages:
if (flags & TEE_SHM_USER_MAPPED)
if (!iov_iter_is_kvec(iter)) unpin_user_pages(shm->pages, shm->num_pages); else shm_put_kernel_pages(shm->pages, shm->num_pages);
+err_free_shm_pages: kfree(shm->pages); err_free_shm: kfree(shm); @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC; struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
struct iov_iter iter; void *ret;
int id;
int id, err; if (!access_ok((void __user *)addr, length)) return ERR_PTR(-EFAULT);
@@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, if (id < 0) return ERR_PTR(id);
shm = register_shm_helper(ctx, addr, length, flags, id);
err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
As I mentioned in a previous review, import_ubuf() already does the access_ok() check, so we don't need the extra access_ok() check above. Also, you should move import_ubuf() to be the first invocation within this API.
-Sumit
if (err)
return ERR_PTR(err);
shm = register_shm_helper(ctx, &iter, flags, id); if (IS_ERR(shm)) { mutex_lock(&teedev->mutex); idr_remove(&teedev->idr, id);
@@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length) { u32 flags = TEE_SHM_DYNAMIC;
struct kvec kvec;
struct iov_iter iter;
kvec.iov_base = addr;
kvec.iov_len = length;
iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
return register_shm_helper(ctx, &iter, flags, -1);
} EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
-- 2.25.1
On 11/30/23 08:54, Sumit Garg wrote:
On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen arnaud.pouliquen@foss.st.com wrote:
Currently it's not possible to register kernel buffers with TEE which are allocated via vmalloc.
Use iov_iter and associated helper functions to manage the page registration for all type of memories.
Suggested-by: Christoph Hellwig hch@infradead.org Signed-off-by: Arnaud Pouliquen arnaud.pouliquen@foss.st.com
Update from V3 to V4:
- improve commit message,
- use import_ubuf() instead of iov_iter_init(),
- move shm_get_kernel_pages in register_shm_helper,
- put back untagged_addr in register_shm_helper(),
- move the comment related to pin pages from shm_get_kernel_pages() to register_shm_helper().
Update from V2 to V3:
- break lines longer than 80 columns.
Update from V1 to V2:
- replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
- replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
V1: The support of buffer registration allocated with vmalloc is no more available since c83900393aa1 ("tee: Remove vmalloc page support").
This patch is an alternative to a revert and resulted from a discussion with Christopher Hellwig [1].
This patch has been tested using xtest tool in optee qemu environment [2] and using the series related to the remoteproc tee that should be proposed soon [3].
References: [1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb... [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-inst... [3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb...
drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 37 deletions(-)
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 673cf0359494..ac73e8143233 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count) put_page(pages[n]); }
-static int shm_get_kernel_pages(unsigned long start, size_t page_count,
struct page **pages)
+static void shm_get_kernel_pages(struct page **pages, size_t page_count) {
struct page *page; size_t n;
if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
is_kmap_addr((void *)start)))
return -EINVAL;
page = virt_to_page((void *)start);
for (n = 0; n < page_count; n++) {
pages[n] = page + n;
for (n = 0; n < page_count; n++) get_page(pages[n]);
}
return page_count;
}
static void release_registered_pages(struct tee_shm *shm) @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size) EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
static struct tee_shm * -register_shm_helper(struct tee_context *ctx, unsigned long addr,
size_t length, u32 flags, int id)
+register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
int id)
{ struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
unsigned long start;
size_t num_pages;
unsigned long start, addr;
size_t num_pages, off;
ssize_t len; void *ret; int rc;
@@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr, shm->flags = flags; shm->ctx = ctx; shm->id = id;
addr = untagged_addr(addr);
addr = untagged_addr((unsigned long)iter_iov_addr(iter)); start = rounddown(addr, PAGE_SIZE);
shm->offset = addr - start;
shm->size = length;
num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
num_pages = iov_iter_npages(iter, INT_MAX);
if (!num_pages) {
ret = ERR_PTR(-ENOMEM);
goto err_ctx_put;
}
shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL); if (!shm->pages) { ret = ERR_PTR(-ENOMEM); goto err_free_shm; }
if (flags & TEE_SHM_USER_MAPPED)
rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
shm->pages);
else
rc = shm_get_kernel_pages(start, num_pages, shm->pages);
if (rc > 0)
shm->num_pages = rc;
if (rc != num_pages) {
if (rc >= 0)
rc = -ENOMEM;
ret = ERR_PTR(rc);
goto err_put_shm_pages;
len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
&off);
if (unlikely(len <= 0)) {
ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
goto err_free_shm_pages; }
/*
* iov_iter_extract_kvec_pages does not get reference on the pages,
* get a pin on them.
I think you meant: "get a reference on them". But I don't see the value of this comment since iov_iter_extract_kvec_pages() already has been commented properly as follows:
/*
- Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
- This does not get references on the pages, nor does it get a pin on them.
*/
I spent some time debugging this part. Since we use the same API for both user and kernel buffers, we wouldn't expect to have any specific actions to take. Therefore, I thought it would be helpful to add a comment explaining the reason for this specific code, rather than go deeper into iov_iter to understand it.
But if you don't see the value, I can remove the comment.
*/
if (iov_iter_is_kvec(iter))
shm_get_kernel_pages(shm->pages, num_pages);
shm->offset = off;
shm->size = len;
shm->num_pages = num_pages;
rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, shm->num_pages, start); if (rc) {
@@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
return shm;
err_put_shm_pages:
if (flags & TEE_SHM_USER_MAPPED)
if (!iov_iter_is_kvec(iter)) unpin_user_pages(shm->pages, shm->num_pages); else shm_put_kernel_pages(shm->pages, shm->num_pages);
+err_free_shm_pages: kfree(shm->pages); err_free_shm: kfree(shm); @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC; struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
struct iov_iter iter; void *ret;
int id;
int id, err; if (!access_ok((void __user *)addr, length)) return ERR_PTR(-EFAULT);
@@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, if (id < 0) return ERR_PTR(id);
shm = register_shm_helper(ctx, addr, length, flags, id);
err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
As I mentioned in a previous review, import_ubuf() already does the access_ok() check, so we don't need the extra access_ok() check above. Also, you should move import_ubuf() to be the first invocation within this API.
My apologies, I re-added import_ubuf() during testing to debug an issue and forgot to remove it afterwards.
Thanks and regards, Arnaud
-Sumit
if (err)
return ERR_PTR(err);
shm = register_shm_helper(ctx, &iter, flags, id); if (IS_ERR(shm)) { mutex_lock(&teedev->mutex); idr_remove(&teedev->idr, id);
@@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length) { u32 flags = TEE_SHM_DYNAMIC;
struct kvec kvec;
struct iov_iter iter;
kvec.iov_base = addr;
kvec.iov_len = length;
iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
return register_shm_helper(ctx, &iter, flags, -1);
} EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
-- 2.25.1
On Thu, 30 Nov 2023 at 14:38, Arnaud POULIQUEN arnaud.pouliquen@foss.st.com wrote:
On 11/30/23 08:54, Sumit Garg wrote:
On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen arnaud.pouliquen@foss.st.com wrote:
Currently it's not possible to register kernel buffers with TEE which are allocated via vmalloc.
Use iov_iter and associated helper functions to manage the page registration for all type of memories.
Suggested-by: Christoph Hellwig hch@infradead.org Signed-off-by: Arnaud Pouliquen arnaud.pouliquen@foss.st.com
Update from V3 to V4:
- improve commit message,
- use import_ubuf() instead of iov_iter_init(),
- move shm_get_kernel_pages in register_shm_helper,
- put back untagged_addr in register_shm_helper(),
- move the comment related to pin pages from shm_get_kernel_pages() to register_shm_helper().
Update from V2 to V3:
- break lines longer than 80 columns.
Update from V1 to V2:
- replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
- replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
V1: The support of buffer registration allocated with vmalloc is no more available since c83900393aa1 ("tee: Remove vmalloc page support").
This patch is an alternative to a revert and resulted from a discussion with Christopher Hellwig [1].
This patch has been tested using xtest tool in optee qemu environment [2] and using the series related to the remoteproc tee that should be proposed soon [3].
References: [1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb... [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-inst... [3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb...
drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 37 deletions(-)
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 673cf0359494..ac73e8143233 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count) put_page(pages[n]); }
-static int shm_get_kernel_pages(unsigned long start, size_t page_count,
struct page **pages)
+static void shm_get_kernel_pages(struct page **pages, size_t page_count) {
struct page *page; size_t n;
if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
is_kmap_addr((void *)start)))
return -EINVAL;
page = virt_to_page((void *)start);
for (n = 0; n < page_count; n++) {
pages[n] = page + n;
for (n = 0; n < page_count; n++) get_page(pages[n]);
}
return page_count;
}
static void release_registered_pages(struct tee_shm *shm) @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size) EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
static struct tee_shm * -register_shm_helper(struct tee_context *ctx, unsigned long addr,
size_t length, u32 flags, int id)
+register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
int id)
{ struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
unsigned long start;
size_t num_pages;
unsigned long start, addr;
size_t num_pages, off;
ssize_t len; void *ret; int rc;
@@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr, shm->flags = flags; shm->ctx = ctx; shm->id = id;
addr = untagged_addr(addr);
addr = untagged_addr((unsigned long)iter_iov_addr(iter)); start = rounddown(addr, PAGE_SIZE);
shm->offset = addr - start;
shm->size = length;
num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
num_pages = iov_iter_npages(iter, INT_MAX);
if (!num_pages) {
ret = ERR_PTR(-ENOMEM);
goto err_ctx_put;
}
shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL); if (!shm->pages) { ret = ERR_PTR(-ENOMEM); goto err_free_shm; }
if (flags & TEE_SHM_USER_MAPPED)
rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
shm->pages);
else
rc = shm_get_kernel_pages(start, num_pages, shm->pages);
if (rc > 0)
shm->num_pages = rc;
if (rc != num_pages) {
if (rc >= 0)
rc = -ENOMEM;
ret = ERR_PTR(rc);
goto err_put_shm_pages;
len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
&off);
if (unlikely(len <= 0)) {
ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
goto err_free_shm_pages; }
/*
* iov_iter_extract_kvec_pages does not get reference on the pages,
* get a pin on them.
I think you meant: "get a reference on them". But I don't see the value of this comment since iov_iter_extract_kvec_pages() already has been commented properly as follows:
/*
- Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
- This does not get references on the pages, nor does it get a pin on them.
*/
I spent some time debugging this part. Since we use the same API for both user and kernel buffers, we wouldn't expect to have any specific actions to take. Therefore, I thought it would be helpful to add a comment explaining the reason for this specific code, rather than go deeper into iov_iter to understand it.
Fair enough, let's keep it with s/pin/reference/.
But if you don't see the value, I can remove the comment.
*/
if (iov_iter_is_kvec(iter))
shm_get_kernel_pages(shm->pages, num_pages);
shm->offset = off;
shm->size = len;
shm->num_pages = num_pages;
rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, shm->num_pages, start); if (rc) {
@@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
return shm;
err_put_shm_pages:
if (flags & TEE_SHM_USER_MAPPED)
if (!iov_iter_is_kvec(iter)) unpin_user_pages(shm->pages, shm->num_pages); else shm_put_kernel_pages(shm->pages, shm->num_pages);
+err_free_shm_pages: kfree(shm->pages); err_free_shm: kfree(shm); @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC; struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
struct iov_iter iter; void *ret;
int id;
int id, err; if (!access_ok((void __user *)addr, length)) return ERR_PTR(-EFAULT);
@@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, if (id < 0) return ERR_PTR(id);
shm = register_shm_helper(ctx, addr, length, flags, id);
err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
As I mentioned in a previous review, import_ubuf() already does the access_ok() check, so we don't need the extra access_ok() check above. Also, you should move import_ubuf() to be the first invocation within this API.
My apologies, I re-added import_ubuf() during testing to debug an issue and
I suppose you intended to mention access_ok() here, BTW, no worries :).
-Sumit
forgot to remove it afterwards.
Thanks and regards, Arnaud
-Sumit
if (err)
return ERR_PTR(err);
shm = register_shm_helper(ctx, &iter, flags, id); if (IS_ERR(shm)) { mutex_lock(&teedev->mutex); idr_remove(&teedev->idr, id);
@@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length) { u32 flags = TEE_SHM_DYNAMIC;
struct kvec kvec;
struct iov_iter iter;
kvec.iov_base = addr;
kvec.iov_len = length;
iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
return register_shm_helper(ctx, &iter, flags, -1);
} EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
-- 2.25.1
On 11/30/23 13:00, Sumit Garg wrote:
On Thu, 30 Nov 2023 at 14:38, Arnaud POULIQUEN arnaud.pouliquen@foss.st.com wrote:
On 11/30/23 08:54, Sumit Garg wrote:
On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen arnaud.pouliquen@foss.st.com wrote:
Currently it's not possible to register kernel buffers with TEE which are allocated via vmalloc.
Use iov_iter and associated helper functions to manage the page registration for all type of memories.
Suggested-by: Christoph Hellwig hch@infradead.org Signed-off-by: Arnaud Pouliquen arnaud.pouliquen@foss.st.com
Update from V3 to V4:
- improve commit message,
- use import_ubuf() instead of iov_iter_init(),
- move shm_get_kernel_pages in register_shm_helper,
- put back untagged_addr in register_shm_helper(),
- move the comment related to pin pages from shm_get_kernel_pages() to register_shm_helper().
Update from V2 to V3:
- break lines longer than 80 columns.
Update from V1 to V2:
- replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
- replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
V1: The support of buffer registration allocated with vmalloc is no more available since c83900393aa1 ("tee: Remove vmalloc page support").
This patch is an alternative to a revert and resulted from a discussion with Christopher Hellwig [1].
This patch has been tested using xtest tool in optee qemu environment [2] and using the series related to the remoteproc tee that should be proposed soon [3].
References: [1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb... [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-inst... [3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb...
drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 37 deletions(-)
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 673cf0359494..ac73e8143233 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count) put_page(pages[n]); }
-static int shm_get_kernel_pages(unsigned long start, size_t page_count,
struct page **pages)
+static void shm_get_kernel_pages(struct page **pages, size_t page_count) {
struct page *page; size_t n;
if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
is_kmap_addr((void *)start)))
return -EINVAL;
page = virt_to_page((void *)start);
for (n = 0; n < page_count; n++) {
pages[n] = page + n;
for (n = 0; n < page_count; n++) get_page(pages[n]);
}
return page_count;
}
static void release_registered_pages(struct tee_shm *shm) @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size) EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
static struct tee_shm * -register_shm_helper(struct tee_context *ctx, unsigned long addr,
size_t length, u32 flags, int id)
+register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
int id)
{ struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
unsigned long start;
size_t num_pages;
unsigned long start, addr;
size_t num_pages, off;
ssize_t len; void *ret; int rc;
@@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr, shm->flags = flags; shm->ctx = ctx; shm->id = id;
addr = untagged_addr(addr);
addr = untagged_addr((unsigned long)iter_iov_addr(iter)); start = rounddown(addr, PAGE_SIZE);
shm->offset = addr - start;
shm->size = length;
num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
num_pages = iov_iter_npages(iter, INT_MAX);
if (!num_pages) {
ret = ERR_PTR(-ENOMEM);
goto err_ctx_put;
}
shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL); if (!shm->pages) { ret = ERR_PTR(-ENOMEM); goto err_free_shm; }
if (flags & TEE_SHM_USER_MAPPED)
rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
shm->pages);
else
rc = shm_get_kernel_pages(start, num_pages, shm->pages);
if (rc > 0)
shm->num_pages = rc;
if (rc != num_pages) {
if (rc >= 0)
rc = -ENOMEM;
ret = ERR_PTR(rc);
goto err_put_shm_pages;
len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
&off);
if (unlikely(len <= 0)) {
ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
goto err_free_shm_pages; }
/*
* iov_iter_extract_kvec_pages does not get reference on the pages,
* get a pin on them.
I think you meant: "get a reference on them". But I don't see the value of this comment since iov_iter_extract_kvec_pages() already has been commented properly as follows:
/*
- Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
- This does not get references on the pages, nor does it get a pin on them.
*/
I spent some time debugging this part. Since we use the same API for both user and kernel buffers, we wouldn't expect to have any specific actions to take. Therefore, I thought it would be helpful to add a comment explaining the reason for this specific code, rather than go deeper into iov_iter to understand it.
Fair enough, let's keep it with s/pin/reference/.
But if you don't see the value, I can remove the comment.
*/
if (iov_iter_is_kvec(iter))
shm_get_kernel_pages(shm->pages, num_pages);
shm->offset = off;
shm->size = len;
shm->num_pages = num_pages;
rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, shm->num_pages, start); if (rc) {
@@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
return shm;
err_put_shm_pages:
if (flags & TEE_SHM_USER_MAPPED)
if (!iov_iter_is_kvec(iter)) unpin_user_pages(shm->pages, shm->num_pages); else shm_put_kernel_pages(shm->pages, shm->num_pages);
+err_free_shm_pages: kfree(shm->pages); err_free_shm: kfree(shm); @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC; struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
struct iov_iter iter; void *ret;
int id;
int id, err; if (!access_ok((void __user *)addr, length)) return ERR_PTR(-EFAULT);
@@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, if (id < 0) return ERR_PTR(id);
shm = register_shm_helper(ctx, addr, length, flags, id);
err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
As I mentioned in a previous review, import_ubuf() already does the access_ok() check, so we don't need the extra access_ok() check above. Also, you should move import_ubuf() to be the first invocation within this API.
My apologies, I re-added import_ubuf() during testing to debug an issue and
I suppose you intended to mention access_ok() here, BTW, no worries :).
Re-running xtest after removing the access_ok() I have a crash in regression_5006.3 Allocate_out_of_memory
[ 89.258100] ------------[ cut here ]------------ [ 89.258377] WARNING: CPU: 1 PID: 134 at mm/page_alloc.c:4402 __alloc_pages+0x674/0xd14 [ 89.258988] Modules linked in: [ 89.259554] CPU: 1 PID: 134 Comm: xtest Not tainted 6.6.0-g1ebcc18a80d7-dirty #69 [ 89.259763] Hardware name: linux,dummy-virt (DT) [ 89.259977] pstate: 21400005 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 89.260143] pc : __alloc_pages+0x674/0xd14 [ 89.260252] lr : alloc_pages+0xac/0x160 [ 89.260364] sp : ffff8000803f3a30 [ 89.260437] x29: ffff8000803f3a30 x28: ffff8000803f3d70 x27: 0000000000000000 [ 89.260705] x26: 000000000000000b x25: 0000000000000000 x24: ffff000000178000 [ 89.260847] x23: ffff00003f52b720 x22: ffff000001442720 x21: 0000000000000000 [ 89.261022] x20: 000000000000000b x19: 0000000000000000 x18: ffff8000803f3b78 [ 89.261162] x17: 0000000000000000 x16: ffffa7a428b67660 x15: 0000ffff63ffffff [ 89.261348] x14: 0000000000000008 x13: 1fffe00000a4c421 x12: ffff8000803f3b78 [ 89.261528] x11: ffff000005262100 x10: ffff000005262108 x9 : ffff00000526210c [ 89.261739] x8 : 0000aaab0a97c000 x7 : 0000aaab0a90e000 x6 : ffff000005262150 [ 89.261920] x5 : 0000000000000000 x4 : ffff000000976740 x3 : 0000000000000000 [ 89.262098] x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffa7a429daf000 [ 89.262340] Call trace: [ 89.262921] __alloc_pages+0x674/0xd14 [ 89.263262] alloc_pages+0xac/0x160 [ 89.263373] alloc_pages_exact+0x48/0x94 [ 89.263464] optee_shm_register+0xa8/0x1f4 [ 89.263591] register_shm_helper+0x1bc/0x28c [ 89.263687] tee_shm_register_user_buf+0xb8/0x128 [ 89.263816] tee_ioctl+0xbc/0xfa0 [ 89.263915] __arm64_sys_ioctl+0xa8/0xec [ 89.264053] invoke_syscall+0x48/0x114 [ 89.264173] el0_svc_common.constprop.0+0x40/0xe8 [ 89.264321] do_el0_svc+0x20/0x2c [ 89.264488] el0_svc+0x40/0xf4 [ 89.264578] el0t_64_sync_handler+0x13c/0x158 [ 89.264714] el0t_64_sync+0x190/0x194 [ 89.265003] ---[ end trace 0000000000000000 ]---
The issue is that, in import_ubuf(), it updates the length [1], making the
access_ok() succeed and leading to an issue later in the page allocation process.
To fix, I propose to add a test in tee_shm_register_user_buf() after calling
import_ubuf()
if (length != iter_iov_len(&iter)) return ERR_PTR(-EFAULT);
Would it be ok for you ? I'm afraid of side effects if I update import_ubuf()...
[1] https://elixir.bootlin.com/linux/latest/source/lib/iov_iter.c#L1553
Regards, Arnaud
-Sumit
forgot to remove it afterwards.
Thanks and regards, Arnaud
-Sumit
if (err)
return ERR_PTR(err);
shm = register_shm_helper(ctx, &iter, flags, id); if (IS_ERR(shm)) { mutex_lock(&teedev->mutex); idr_remove(&teedev->idr, id);
@@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length) { u32 flags = TEE_SHM_DYNAMIC;
struct kvec kvec;
struct iov_iter iter;
kvec.iov_base = addr;
kvec.iov_len = length;
iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
return register_shm_helper(ctx, &iter, flags, -1);
} EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
-- 2.25.1
+ Jens A., Al Viro
<snip>
*/
if (iov_iter_is_kvec(iter))
shm_get_kernel_pages(shm->pages, num_pages);
shm->offset = off;
shm->size = len;
shm->num_pages = num_pages;
rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, shm->num_pages, start); if (rc) {
@@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
return shm;
err_put_shm_pages:
if (flags & TEE_SHM_USER_MAPPED)
if (!iov_iter_is_kvec(iter)) unpin_user_pages(shm->pages, shm->num_pages); else shm_put_kernel_pages(shm->pages, shm->num_pages);
+err_free_shm_pages: kfree(shm->pages); err_free_shm: kfree(shm); @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC; struct tee_device *teedev = ctx->teedev; struct tee_shm *shm;
struct iov_iter iter; void *ret;
int id;
int id, err; if (!access_ok((void __user *)addr, length)) return ERR_PTR(-EFAULT);
@@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, if (id < 0) return ERR_PTR(id);
shm = register_shm_helper(ctx, addr, length, flags, id);
err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
As I mentioned in a previous review, import_ubuf() already does the access_ok() check, so we don't need the extra access_ok() check above. Also, you should move import_ubuf() to be the first invocation within this API.
My apologies, I re-added import_ubuf() during testing to debug an issue and
I suppose you intended to mention access_ok() here, BTW, no worries :).
Re-running xtest after removing the access_ok() I have a crash in regression_5006.3 Allocate_out_of_memory
[ 89.258100] ------------[ cut here ]------------ [ 89.258377] WARNING: CPU: 1 PID: 134 at mm/page_alloc.c:4402 __alloc_pages+0x674/0xd14 [ 89.258988] Modules linked in: [ 89.259554] CPU: 1 PID: 134 Comm: xtest Not tainted 6.6.0-g1ebcc18a80d7-dirty #69 [ 89.259763] Hardware name: linux,dummy-virt (DT) [ 89.259977] pstate: 21400005 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 89.260143] pc : __alloc_pages+0x674/0xd14 [ 89.260252] lr : alloc_pages+0xac/0x160 [ 89.260364] sp : ffff8000803f3a30 [ 89.260437] x29: ffff8000803f3a30 x28: ffff8000803f3d70 x27: 0000000000000000 [ 89.260705] x26: 000000000000000b x25: 0000000000000000 x24: ffff000000178000 [ 89.260847] x23: ffff00003f52b720 x22: ffff000001442720 x21: 0000000000000000 [ 89.261022] x20: 000000000000000b x19: 0000000000000000 x18: ffff8000803f3b78 [ 89.261162] x17: 0000000000000000 x16: ffffa7a428b67660 x15: 0000ffff63ffffff [ 89.261348] x14: 0000000000000008 x13: 1fffe00000a4c421 x12: ffff8000803f3b78 [ 89.261528] x11: ffff000005262100 x10: ffff000005262108 x9 : ffff00000526210c [ 89.261739] x8 : 0000aaab0a97c000 x7 : 0000aaab0a90e000 x6 : ffff000005262150 [ 89.261920] x5 : 0000000000000000 x4 : ffff000000976740 x3 : 0000000000000000 [ 89.262098] x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffa7a429daf000 [ 89.262340] Call trace: [ 89.262921] __alloc_pages+0x674/0xd14 [ 89.263262] alloc_pages+0xac/0x160 [ 89.263373] alloc_pages_exact+0x48/0x94 [ 89.263464] optee_shm_register+0xa8/0x1f4 [ 89.263591] register_shm_helper+0x1bc/0x28c [ 89.263687] tee_shm_register_user_buf+0xb8/0x128 [ 89.263816] tee_ioctl+0xbc/0xfa0 [ 89.263915] __arm64_sys_ioctl+0xa8/0xec [ 89.264053] invoke_syscall+0x48/0x114 [ 89.264173] el0_svc_common.constprop.0+0x40/0xe8 [ 89.264321] do_el0_svc+0x20/0x2c [ 89.264488] el0_svc+0x40/0xf4 [ 89.264578] el0t_64_sync_handler+0x13c/0x158 [ 89.264714] el0t_64_sync+0x190/0x194 [ 89.265003] ---[ end trace 0000000000000000 ]---
The issue is that, in import_ubuf(), it updates the length [1], making the
access_ok() succeed and leading to an issue later in the page allocation process.
To fix, I propose to add a test in tee_shm_register_user_buf() after calling
import_ubuf()
if (length != iter_iov_len(&iter)) return ERR_PTR(-EFAULT);
Would it be ok for you ? I'm afraid of side effects if I update import_ubuf()...
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) { - if (len > MAX_RW_COUNT) - len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT; + if (len > MAX_RW_COUNT) + len = MAX_RW_COUNT;
iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
-Sumit
[1] https://elixir.bootlin.com/linux/latest/source/lib/iov_iter.c#L1553
On 12/4/23 5:42 AM, Sumit Garg wrote:
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT;
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done elsewhere too.
On 12/4/23 9:36 AM, Jens Axboe wrote:
On 12/4/23 5:42 AM, Sumit Garg wrote:
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT;
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done elsewhere too.
For some reason I missed import_single_range(), which does it the same way as import_ubuf() currently does - cap the range before the access_ok() check. The vec variants sum as they go, but access_ok() before the range.
I think part of the issue here is that the single range imports return 0 for success and -ERROR otherwise. This means that the caller does not know if the full range was imported or not. OTOH, we always cap any data transfer at MAX_RW_COUNT, so may make more sense to fix up the caller here.
Hi,
On 12/4/23 17:40, Jens Axboe wrote:
On 12/4/23 9:36 AM, Jens Axboe wrote:
On 12/4/23 5:42 AM, Sumit Garg wrote:
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT;
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done elsewhere too.
For some reason I missed import_single_range(), which does it the same way as import_ubuf() currently does - cap the range before the access_ok() check. The vec variants sum as they go, but access_ok() before the range.
I think part of the issue here is that the single range imports return 0 for success and -ERROR otherwise. This means that the caller does not know if the full range was imported or not. OTOH, we always cap any data transfer at MAX_RW_COUNT, so may make more sense to fix up the caller here.
Should we limit to MAX_RW_COUNT or return an error? Seems to me that limiting could generate side effect later that could be not simple to debug.
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
return -EFAULT; if (unlikely(!access_ok(buf, len))) return -EFAULT; iov_iter_ubuf(i, rw, buf, len); return 0;
or perhaps just remove the test as __access_ok() already tests that the size < TASK_SIZE
https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_...
Thanks, Arnaud
On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
Hi,
On 12/4/23 17:40, Jens Axboe wrote:
On 12/4/23 9:36 AM, Jens Axboe wrote:
On 12/4/23 5:42 AM, Sumit Garg wrote:
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT;
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done elsewhere too.
For some reason I missed import_single_range(), which does it the same way as import_ubuf() currently does - cap the range before the access_ok() check. The vec variants sum as they go, but access_ok() before the range.
I think part of the issue here is that the single range imports return 0 for success and -ERROR otherwise. This means that the caller does not know if the full range was imported or not. OTOH, we always cap any data transfer at MAX_RW_COUNT, so may make more sense to fix up the caller here.
Should we limit to MAX_RW_COUNT or return an error? Seems to me that limiting could generate side effect later that could be not simple to debug.
We've traditionally just truncated the length, so principle of least surprise says we should continue doing that.
hi Jens Axboe, Al Viro,
On 12/4/23 18:13, Jens Axboe wrote:
On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
Hi,
On 12/4/23 17:40, Jens Axboe wrote:
On 12/4/23 9:36 AM, Jens Axboe wrote:
On 12/4/23 5:42 AM, Sumit Garg wrote:
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT;
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done elsewhere too.
For some reason I missed import_single_range(), which does it the same way as import_ubuf() currently does - cap the range before the access_ok() check. The vec variants sum as they go, but access_ok() before the range.
I think part of the issue here is that the single range imports return 0 for success and -ERROR otherwise. This means that the caller does not know if the full range was imported or not. OTOH, we always cap any data transfer at MAX_RW_COUNT, so may make more sense to fix up the caller here.
Should we limit to MAX_RW_COUNT or return an error? Seems to me that limiting could generate side effect later that could be not simple to debug.
We've traditionally just truncated the length, so principle of least surprise says we should continue doing that.
As Jens Wiklander has proposed using iov_iter_ubuf() instead of import_ubuf(), should I propose a patch updating import_ubuf() and import_single_range()? Or would you prefer that we keep the functions unchanged for the time being?
Regards, Arnaud
On 12/5/23 9:55 AM, Arnaud POULIQUEN wrote:
hi Jens Axboe, Al Viro,
On 12/4/23 18:13, Jens Axboe wrote:
On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
Hi,
On 12/4/23 17:40, Jens Axboe wrote:
On 12/4/23 9:36 AM, Jens Axboe wrote:
On 12/4/23 5:42 AM, Sumit Garg wrote:
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT;
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done elsewhere too.
For some reason I missed import_single_range(), which does it the same way as import_ubuf() currently does - cap the range before the access_ok() check. The vec variants sum as they go, but access_ok() before the range.
I think part of the issue here is that the single range imports return 0 for success and -ERROR otherwise. This means that the caller does not know if the full range was imported or not. OTOH, we always cap any data transfer at MAX_RW_COUNT, so may make more sense to fix up the caller here.
Should we limit to MAX_RW_COUNT or return an error? Seems to me that limiting could generate side effect later that could be not simple to debug.
We've traditionally just truncated the length, so principle of least surprise says we should continue doing that.
As Jens Wiklander has proposed using iov_iter_ubuf() instead of import_ubuf(), should I propose a patch updating import_ubuf() and import_single_range()? Or would you prefer that we keep the functions unchanged for the time being?
Arguably it should be consistent with iovec imports, which return the length (or error). But it might be safer to just check access_ok() first and then truncate at least, vs what is there now.
Note that for 6.8 import_single_range() is gone as it was really just doing the same thing that import_ubuf() is. Any further changes in this area should CC Christian Brauner as well, as he has that staged in his tree.
As Jens Wiklander has proposed using iov_iter_ubuf() instead of import_ubuf(), should I propose a patch updating import_ubuf() and import_single_range()? Or would you prefer that we keep the functions unchanged for the time being?
Arguably it should be consistent with iovec imports, which return the length (or error). But it might be safer to just check access_ok() first and then truncate at least, vs what is there now.
Is the access_ok() check even needed when setting up an iov_iter? It is always checked again when the actual copy is done.
I looked at this a while back and couldn't see any code paths that relied on the early access_ok() check. Maybe it is historic?
David
- Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
Hi Arnaud,
On Mon, 4 Dec 2023 at 22:32, Arnaud POULIQUEN arnaud.pouliquen@foss.st.com wrote:
Hi,
On 12/4/23 17:40, Jens Axboe wrote:
On 12/4/23 9:36 AM, Jens Axboe wrote:
On 12/4/23 5:42 AM, Sumit Garg wrote:
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT;
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done elsewhere too.
For some reason I missed import_single_range(), which does it the same way as import_ubuf() currently does - cap the range before the access_ok() check. The vec variants sum as they go, but access_ok() before the range.
I think part of the issue here is that the single range imports return 0 for success and -ERROR otherwise. This means that the caller does not know if the full range was imported or not. OTOH, we always cap any data transfer at MAX_RW_COUNT, so may make more sense to fix up the caller here.
Should we limit to MAX_RW_COUNT or return an error? Seems to me that limiting could generate side effect later that could be not simple to debug.
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
return -EFAULT; if (unlikely(!access_ok(buf, len))) return -EFAULT; iov_iter_ubuf(i, rw, buf, len); return 0;
or perhaps just remove the test as __access_ok() already tests that the size < TASK_SIZE
https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_...
It looks like there are predefined constraints for using import_ubuf() which doesn't properly match our needs. So let's directly use: iov_iter_ubuf() instead.
-Sumit
Thanks, Arnaud
Hi Sumit,
On 12/5/23 13:07, Sumit Garg wrote:
Hi Arnaud,
On Mon, 4 Dec 2023 at 22:32, Arnaud POULIQUEN arnaud.pouliquen@foss.st.com wrote:
Hi,
On 12/4/23 17:40, Jens Axboe wrote:
On 12/4/23 9:36 AM, Jens Axboe wrote:
On 12/4/23 5:42 AM, Sumit Garg wrote:
IMO, access_ok() should be the first thing that import_ubuf() or import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8ff6824a1005..4aee0371824c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT;
if (len > MAX_RW_COUNT)
len = MAX_RW_COUNT; iov_iter_ubuf(i, rw, buf, len); return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done elsewhere too.
For some reason I missed import_single_range(), which does it the same way as import_ubuf() currently does - cap the range before the access_ok() check. The vec variants sum as they go, but access_ok() before the range.
I think part of the issue here is that the single range imports return 0 for success and -ERROR otherwise. This means that the caller does not know if the full range was imported or not. OTOH, we always cap any data transfer at MAX_RW_COUNT, so may make more sense to fix up the caller here.
Should we limit to MAX_RW_COUNT or return an error? Seems to me that limiting could generate side effect later that could be not simple to debug.
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) {
if (len > MAX_RW_COUNT)
return -EFAULT; if (unlikely(!access_ok(buf, len))) return -EFAULT; iov_iter_ubuf(i, rw, buf, len); return 0;
or perhaps just remove the test as __access_ok() already tests that the size < TASK_SIZE
https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_...
It looks like there are predefined constraints for using import_ubuf() which doesn't properly match our needs. So let's directly use: iov_iter_ubuf() instead.
Yes, this seems a safer alternative. I will send a new version based on it.
Thanks, Arnaud
-Sumit
Thanks, Arnaud
op-tee@lists.trustedfirmware.org