blob: a3757df923310b02cb61c231b96f65eb2de0db51 [file] [edit]
// SPDX-License-Identifier: GPL-2.0
/*
* Paravirtualized DMA operations that offers DMA inspection between
* guest & host.
*/
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/dma-direct.h>
#include <linux/bitmap.h>
#include <linux/scatterlist.h>
#include <linux/pci.h>
#include <linux/dma-map-ops.h>
#include <linux/coiommu_dev.h>
#include <linux/coiommu.h>
#include <linux/iommu.h>
#include "coiommu.h"
#include "direct.h"
static struct coiommu *global_coiommu;
static inline unsigned int dtt_level_to_offset(unsigned long pfn,
unsigned int level)
{
unsigned int offset;
if (level == DTT_LAST_LEVEL)
return (pfn) & COIOMMU_PT_LEVEL_MASK;
offset = COIOMMU_PT_LEVEL_STRIDE + (level - 2) * COIOMMU_UPPER_LEVEL_STRIDE;
return (pfn >> offset) & COIOMMU_UPPER_LEVEL_MASK;
}
static void *dtt_alloc_page(struct coiommu_dtt *dtt)
{
struct dtt_page_cache *c;
unsigned long flags;
void *obj = NULL;
spin_lock_irqsave(&dtt->alloc_lock, flags);
c = &dtt->cache[dtt->cur_cache];
if (!c->nobjs) {
/*
* get the page directly
*/
obj = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_ACCOUNT);
if (!obj)
pr_err("%s: coiommu failed to alloc dtt page\n",
__func__);
} else {
obj = c->objects[--c->nobjs];
if (!c->nobjs)
/*
* Prepare the next cache by waking up the alloc work
*/
kthread_queue_work(dtt->worker, &dtt->alloc_work);
}
spin_unlock_irqrestore(&dtt->alloc_lock, flags);
return obj;
}
static struct dtt_leaf_entry *pfn_to_dtt_pte(struct coiommu_dtt *dtt,
unsigned long pfn, bool alloc)
{
struct dtt_parent_entry *parent_pte;
unsigned int index;
struct dtt_leaf_entry *leaf_pte;
unsigned int level = dtt->level;
void *pt = (void *)dtt->root;
u64 pteval;
while (level != DTT_LAST_LEVEL) {
index = dtt_level_to_offset(pfn, level);
parent_pte = (struct dtt_parent_entry *)pt + index;
if (!parent_pte_present(parent_pte)) {
if (!alloc)
break;
pt = dtt_alloc_page(dtt);
if (!pt)
break;
pteval = parent_pte_value(pt);
if (cmpxchg64(&parent_pte->val, 0ULL, pteval))
/* Someone else set it, free this one */
free_page((unsigned long)pt);
else
atomic_inc(&dtt->pages);
}
pt = phys_to_virt(parent_pte_addr(parent_pte));
level--;
}
if (level > DTT_LAST_LEVEL) {
pr_err("coiommu: DTT %s failed at level %d for pfn 0x%lx\n",
alloc ? "alloc" : "absent", level, pfn);
return NULL;
}
index = dtt_level_to_offset(pfn, DTT_LAST_LEVEL);
leaf_pte = (struct dtt_leaf_entry *)pt + index;
return leaf_pte;
}
static bool is_page_pinned(struct coiommu_dtt *dtt, unsigned long pfn)
{
struct dtt_leaf_entry *leaf_pte = pfn_to_dtt_pte(dtt, pfn, false);
if (leaf_pte == NULL)
return false;
return coiommu_test_flag((1 << DTTE_PINNED_FLAG), &leaf_pte->dtte);
}
static void unmark_pfn(struct dtt_leaf_entry *leaf_pte, bool clear_accessed)
{
if (!(atomic_read(&leaf_pte->dtte) & DTTE_MAP_CNT_MASK)) {
pr_err("%s: coiomu: map count already zero, leaf_pte 0x%llx\n",
__func__, (u64)leaf_pte);
return;
}
if (!(atomic_dec_return(&leaf_pte->dtte) & DTTE_MAP_CNT_MASK)) {
if (unlikely(clear_accessed))
/*
* The clear_accessed is only true in the error handling code
* path, like pin a page failed and need reverse some operations.
* So this happens in rare.
* If this page is pinned successfully by another thread right
* before decreasing the map count here, then the access flag
* won't be cleared which is expected.
* If this page is pinned successfully by another thread right
* after decreasing the map count here, then the access flag
* will still be cleared. This won't cause any issue but just
* messes up access tracking a little bit.
*/
coiommu_clear_flag((1 << DTTE_ACCESSED_FLAG),
&leaf_pte->dtte);
}
}
static void unmark_pfns(struct coiommu_dtt *dtt, unsigned long pfn,
unsigned long nr_pages, bool clear_accessed)
{
struct dtt_leaf_entry *leaf_pte = NULL;
unsigned long count = 0;
unsigned int index = 0;
for (; count < nr_pages; count++) {
if (!leaf_pte || index > COIOMMU_PT_LEVEL_MASK) {
leaf_pte = pfn_to_dtt_pte(dtt, pfn + count, false);
if (leaf_pte == NULL) {
pr_err("%s: coiommu: pfn 0x%lx pte is NULL\n",
__func__, pfn + count);
/* For the entries in the same page table
* page, they should all be NULL, so we
* can just skip all of them.
*/
index = dtt_level_to_offset(pfn + count,
DTT_LAST_LEVEL);
count += COIOMMU_PT_LEVEL_MASK - index;
continue;
}
index = dtt_level_to_offset(pfn + count,
DTT_LAST_LEVEL);
} else
leaf_pte += 1;
unmark_pfn(leaf_pte, clear_accessed);
index++;
}
}
static int mark_pfn(struct coiommu_dtt *dtt,
struct dtt_leaf_entry *leaf_pte,
bool *pinned)
{
unsigned long flags;
unsigned int dtte;
local_irq_save(flags);
dtte = atomic_inc_return(&leaf_pte->dtte);
if ((dtte & DTTE_MAP_CNT_MASK) > dtt->max_map_count) {
pr_err("%s: coiommu: %d maps already done, leaf_pte 0x%llx\n",
__func__, (dtte & DTTE_MAP_CNT_MASK), (u64)leaf_pte);
atomic_dec(&leaf_pte->dtte);
local_irq_restore(flags);
return -EINVAL;
}
local_irq_restore(flags);
coiommu_set_flag((1 << DTTE_ACCESSED_FLAG), &leaf_pte->dtte);
if (pinned)
*pinned = !!coiommu_test_flag((1 << DTTE_PINNED_FLAG),
&leaf_pte->dtte);
return 0;
}
static int mark_pfns(struct coiommu_dtt *dtt, unsigned long pfn,
unsigned long nr_pages, struct pin_pages_info *pin_info)
{
struct dtt_leaf_entry *leaf_pte = NULL;
unsigned long count = 0;
unsigned int index = 0;
bool pinned;
int ret = 0;
for (count = 0; count < nr_pages; count++) {
if (!leaf_pte || index > COIOMMU_PT_LEVEL_MASK) {
leaf_pte = pfn_to_dtt_pte(dtt, pfn + count, true);
if (leaf_pte == NULL) {
pr_err("%s: coiommu: pfn 0x%lx pte is NULL\n",
__func__, pfn);
ret = -EINVAL;
goto out;
}
index = dtt_level_to_offset(pfn + count, DTT_LAST_LEVEL);
} else
leaf_pte += 1;
ret = mark_pfn(dtt, leaf_pte, &pinned);
if (ret)
goto out;
if (!pinned) {
pin_info->pfn[pin_info->nr_pages] = pfn + count;
pin_info->nr_pages++;
}
index++;
}
return 0;
out:
unmark_pfns(dtt, pfn, count, true);
return ret;
}
static inline unsigned long get_aligned_nrpages(phys_addr_t phys_addr,
size_t size)
{
return PAGE_ALIGN((phys_addr & (PAGE_SIZE - 1)) + size) >> PAGE_SHIFT;
}
static inline unsigned short get_pci_device_id(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
return PCI_DEVID(pdev->bus->number, pdev->devfn);
}
static void unmark_dma_addr(struct device *dev, size_t size,
dma_addr_t dma_addr)
{
struct coiommu_dtt *dtt = &global_coiommu->dtt;
phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
unsigned long pfn = phys_addr >> PAGE_SHIFT;
unsigned long nr_pages = get_aligned_nrpages(phys_addr, size);
if (unlikely(!dtt))
return;
read_lock(&dtt->lock);
if (likely(dtt->root))
unmark_pfns(dtt, pfn, nr_pages, false);
read_unlock(&dtt->lock);
}
static void unmark_sg_pfns(struct coiommu_dtt *dtt,
struct scatterlist *sgl,
int nents, bool clear_accessed)
{
struct scatterlist *sg;
phys_addr_t phys_addr;
unsigned long pfn;
unsigned long nr_pages;
int i;
for_each_sg(sgl, sg, nents, i) {
phys_addr = sg->dma_address;
pfn = phys_addr >> PAGE_SHIFT;
nr_pages = get_aligned_nrpages(phys_addr, sg->length);
read_lock(&dtt->lock);
if (unlikely(!dtt->root)) {
read_unlock(&dtt->lock);
return;
}
unmark_pfns(dtt, pfn, nr_pages, clear_accessed);
read_unlock(&dtt->lock);
}
}
static void unmark_sg(struct scatterlist *sgl,
int nents, bool clear_accessed)
{
struct coiommu_dtt *dtt = &global_coiommu->dtt;
if (likely(dtt))
unmark_sg_pfns(dtt, sgl, nents, clear_accessed);
}
static int pin_page(struct coiommu_dtt *dtt, unsigned long pfn,
unsigned short bdf)
{
struct coiommu *coiommu = dtt_to_coiommu(dtt);
int ret;
ret = coiommu->dev_ops->execute_request(coiommu->dev, pfn, bdf);
if (ret)
return ret;
if (!is_page_pinned(dtt, pfn)) {
pr_err("%s: coiommu pin pfn 0x%lx failed\n", __func__, pfn);
return -EFAULT;
}
return 0;
}
static int pin_page_list(struct coiommu_dtt *dtt, struct pin_pages_info *pin_info)
{
struct coiommu *coiommu = dtt_to_coiommu(dtt);
int ret, count;
ret = coiommu->dev_ops->execute_requests(coiommu->dev, pin_info);
if (ret)
return ret;
for (count = 0; count < pin_info->nr_pages; count++) {
if (!is_page_pinned(dtt, pin_info->pfn[count])) {
pr_err("%s: coiommu pin pfn 0x%llx failed\n",
__func__, pin_info->pfn[count]);
return -EFAULT;
}
}
return 0;
}
static int pin_and_mark_pfn(struct device *dev, unsigned long pfn)
{
struct dtt_leaf_entry *leaf_pte;
unsigned short bdf = get_pci_device_id(dev);
struct coiommu_dtt *dtt = &global_coiommu->dtt;
int ret = 0;
bool pinned;
if (!dtt)
return -ENODEV;
read_lock(&dtt->lock);
if (unlikely(!dtt->root)) {
ret = -ENODEV;
goto out;
}
leaf_pte = pfn_to_dtt_pte(dtt, pfn, true);
if (leaf_pte == NULL) {
pr_err("%s: coiommu: pfn 0x%lx pte is NULL\n", __func__, pfn);
ret = -EINVAL;
goto out;
}
ret = mark_pfn(dtt, leaf_pte, &pinned);
if (ret)
goto out;
if (!pinned) {
ret = pin_page(dtt, pfn, bdf);
if (unlikely(ret))
unmark_pfn(leaf_pte, true);
}
out:
read_unlock(&dtt->lock);
return ret;
}
static int pin_and_mark_pfns(struct device *dev, unsigned long start_pfn,
unsigned long nr_pages)
{
unsigned short bdf = get_pci_device_id(dev);
struct coiommu_dtt *dtt = &global_coiommu->dtt;
struct pin_pages_info *pin_info;
int ret;
if (nr_pages == 1)
return pin_and_mark_pfn(dev, start_pfn);
if (!dtt)
return -ENODEV;
pin_info = kzalloc(sizeof(struct pin_pages_info) +
nr_pages * sizeof(unsigned long),
GFP_ATOMIC);
if (!pin_info)
return -ENOMEM;
read_lock(&dtt->lock);
if (unlikely(!dtt->root)) {
ret = -ENODEV;
goto out;
}
ret = mark_pfns(dtt, start_pfn, nr_pages, pin_info);
if (ret)
goto out;
if (pin_info->nr_pages > 0) {
pin_info->bdf = bdf;
ret = pin_page_list(dtt, pin_info);
if (unlikely(ret))
/*
* Note - In case pin failures, all pfns required for
* this dma mapping shall fail, which means none of
* them will participate in the dma operations.
* Hence their map count shall be decremented.
*/
unmark_pfns(dtt, start_pfn, nr_pages, true);
}
out:
read_unlock(&dtt->lock);
kfree(pin_info);
return ret;
}
static int pin_and_mark_dma_addr(struct device *dev, size_t size,
dma_addr_t dma_addr)
{
phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
unsigned long nr_pages = get_aligned_nrpages(phys_addr, size);
unsigned long pfn = phys_addr >> PAGE_SHIFT;
int ret;
ret = pin_and_mark_pfns(dev, pfn, nr_pages);
if (unlikely(ret))
dev_err(dev, "%s: coiommu failed to pin DMA buffer: %d\n",
__func__, ret);
return ret;
}
static int pin_and_mark_sg_list(struct device *dev,
struct scatterlist *sgl,
int nents)
{
unsigned short bdf = get_pci_device_id(dev);
struct coiommu_dtt *dtt = &global_coiommu->dtt;
struct scatterlist *sg;
unsigned long nr_pages = 0;
phys_addr_t phys_addr;
unsigned long pfn;
struct pin_pages_info *pin_info = NULL;
int i, ret = 0;
if (!dtt)
return -ENODEV;
for_each_sg(sgl, sg, nents, i) {
phys_addr = sg->dma_address;
nr_pages += get_aligned_nrpages(phys_addr, sg->length);
}
pin_info = kzalloc(sizeof(struct pin_pages_info) +
nr_pages * sizeof(unsigned long), GFP_ATOMIC);
if (!pin_info)
return -ENOMEM;
read_lock(&dtt->lock);
if (unlikely(!dtt->root)) {
ret = -ENODEV;
goto out;
}
for_each_sg(sgl, sg, nents, i) {
phys_addr = sg->dma_address;
pfn = phys_addr >> PAGE_SHIFT;
nr_pages = get_aligned_nrpages(phys_addr, sg->length);
ret = mark_pfns(dtt, pfn, nr_pages, pin_info);
if (ret) {
unmark_sg_pfns(dtt, sgl, i, true);
goto out;
}
}
if (pin_info->nr_pages > 0) {
pin_info->bdf = bdf;
ret = pin_page_list(dtt, pin_info);
if (unlikely(ret))
/*
* Note - In case pin failures, all pfns required for this
* dma mapping shall fail, which means none of them will
* participate in the dma operations. Hence their map count
* shall be decremented.
*/
unmark_sg_pfns(dtt, sgl, nents, true);
}
out:
read_unlock(&dtt->lock);
kfree(pin_info);
return ret;
}
static void *coiommu_alloc(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t gfp,
unsigned long attrs)
{
void *cpu_addr = dma_direct_alloc(dev, size, dma_addr, gfp, attrs);
if (!cpu_addr) {
dev_err(dev, "%s: failed\n", __func__);
return NULL;
}
if (pin_and_mark_dma_addr(dev, size, *dma_addr))
goto out_free;
return cpu_addr;
out_free:
dma_direct_free(dev, size, cpu_addr, *dma_addr, attrs);
return NULL;
}
static void coiommu_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
unmark_dma_addr(dev, size, dma_addr);
}
static struct page *coiommu_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle,
enum dma_data_direction dir,
gfp_t gfp)
{
struct page *page = dma_direct_alloc_pages(dev, size, dma_handle,
dir, gfp);
if (!page) {
dev_err(dev, "%s: failed\n", __func__);
return NULL;
}
if (pin_and_mark_dma_addr(dev, size, *dma_handle))
goto out_free;
return page;
out_free:
dma_direct_free_pages(dev, size, page, *dma_handle, dir);
return NULL;
}
static void coiommu_free_pages(struct device *dev, size_t size,
struct page *page, dma_addr_t dma_handle,
enum dma_data_direction dir)
{
dma_direct_free_pages(dev, size, page, dma_handle, dir);
unmark_dma_addr(dev, size, dma_handle);
}
static dma_addr_t coiommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
dma_addr_t dma_addr = dma_direct_map_page(dev, page, offset,
size, dir, attrs);
if (dma_addr == DMA_MAPPING_ERROR) {
dev_err(dev, "%s: failed\n", __func__);
return dma_addr;
}
if (pin_and_mark_dma_addr(dev, size, dma_addr))
goto out_unmap;
return dma_addr;
out_unmap:
dma_direct_unmap_page(dev, dma_addr, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
return DMA_MAPPING_ERROR;
}
static void coiommu_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
dma_direct_unmap_page(dev, addr, size, dir, attrs);
unmark_dma_addr(dev, size, addr);
}
static int coiommu_map_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir,
unsigned long attrs)
{
nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
if (!nents) {
dev_err(dev, "%s: failed\n", __func__);
return 0;
}
if (pin_and_mark_sg_list(dev, sgl, nents))
goto out_unmap;
return nents;
out_unmap:
dma_direct_unmap_sg(dev, sgl, nents, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
return 0;
}
static void coiommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir,
unsigned long attrs)
{
dma_direct_unmap_sg(dev, sgl, nents, dir, attrs);
unmark_sg(sgl, nents, false);
}
static const struct dma_map_ops coiommu_ops = {
.alloc = coiommu_alloc,
.free = coiommu_free,
.alloc_pages = coiommu_alloc_pages,
.free_pages = coiommu_free_pages,
.mmap = dma_direct_mmap,
.get_sgtable = dma_direct_get_sgtable,
.map_page = coiommu_map_page,
.unmap_page = coiommu_unmap_page,
.map_sg = coiommu_map_sg,
.unmap_sg = coiommu_unmap_sg,
.map_resource = dma_direct_map_resource,
.sync_single_for_cpu = dma_direct_sync_single_for_cpu,
.sync_single_for_device = dma_direct_sync_single_for_device,
.sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
.sync_sg_for_device = dma_direct_sync_sg_for_device,
.dma_supported = dma_direct_supported,
.get_required_mask = dma_direct_get_required_mask,
.max_mapping_size = dma_direct_max_mapping_size,
};
static inline unsigned int get_dtt_level(void)
{
unsigned int pfn_width;
pfn_width = MAX_PHYSMEM_BITS - PAGE_SHIFT;
if (pfn_width <= COIOMMU_PT_LEVEL_STRIDE)
return 1;
return DIV_ROUND_UP((pfn_width - COIOMMU_PT_LEVEL_STRIDE),
COIOMMU_UPPER_LEVEL_STRIDE) + 1;
}
static void dtt_free(void *pt, unsigned int level)
{
struct dtt_parent_entry *pte;
u64 phys;
int i;
/*
* The last level contains the DMA tracking which doesn't
* point to any physical memory, so don't need to free any
* entry but the page itself.
*/
if (level == DTT_LAST_LEVEL)
goto free;
for (i = 0; i < 1 << COIOMMU_UPPER_LEVEL_STRIDE; i++) {
pte = (struct dtt_parent_entry *)pt + i;
if (!parent_pte_present(pte))
continue;
phys = parent_pte_addr(pte);
dtt_free(phys_to_virt(phys), level - 1);
}
free:
free_page((unsigned long)pt);
}
static void dtt_root_free(struct coiommu_dtt *dtt)
{
dtt_free((void *)dtt->root, dtt->level);
dtt->root = NULL;
dtt->level = 0;
}
static int populate_dtt_page_cache(struct dtt_page_cache *c,
int count, gfp_t gfp_mask)
{
void *obj;
while (c->nobjs < count) {
obj = (void *)get_zeroed_page(gfp_mask);
if (!obj)
break;
c->objects[c->nobjs++] = obj;
}
return c->nobjs;
}
static void dtt_page_cache_free(struct coiommu_dtt *dtt)
{
struct dtt_page_cache *c;
int i;
for (i = 0; i < ARRAY_SIZE(dtt->cache); i++) {
c = &dtt->cache[i];
while (c->nobjs)
free_page((unsigned long)c->objects[--c->nobjs]);
}
}
static int dtt_page_cache_alloc(struct coiommu_dtt *dtt)
{
int i;
for (i = 0; i < ARRAY_SIZE(dtt->cache); i++) {
if (!populate_dtt_page_cache(&dtt->cache[i],
COIOMMU_INFO_NR_OBJS, GFP_KERNEL_ACCOUNT)) {
goto free;
}
}
return 0;
free:
dtt_page_cache_free(dtt);
return -ENOMEM;
}
static void alloc_dtt_pages(struct kthread_work *work)
{
struct coiommu_dtt *dtt =
container_of(work, struct coiommu_dtt, alloc_work);
int prev_cache = dtt->cur_cache;
unsigned long flags;
int nobjs;
spin_lock_irqsave(&dtt->alloc_lock, flags);
dtt->cur_cache = !dtt->cur_cache;
spin_unlock_irqrestore(&dtt->alloc_lock, flags);
nobjs = populate_dtt_page_cache(&dtt->cache[prev_cache],
COIOMMU_INFO_NR_OBJS, GFP_KERNEL_ACCOUNT);
if (nobjs != COIOMMU_INFO_NR_OBJS)
pr_warn("%s: coiommu: cache%d supposed to get %d pages but got %d\n",
__func__, prev_cache, COIOMMU_INFO_NR_OBJS, nobjs);
}
static int coiommu_setup_endpoint(struct device *dev)
{
struct coiommu *coiommu = NULL;
int i;
if (!global_coiommu || !global_coiommu->endpoints)
return 0;
for (i = 0; i < global_coiommu->ep_count; i++) {
if (get_pci_device_id(dev) == global_coiommu->endpoints[i]) {
coiommu = global_coiommu;
break;
}
}
/*
* Device is not on top of coIOMMU, so no need to setup
*/
if (!coiommu)
return 0;
if (!coiommu->dev_ops) {
dev_info(dev, "%s: probe earlier than coiommu, deferred\n", __func__);
return -EPROBE_DEFER;
}
set_dma_ops(dev, &coiommu_ops);
return 0;
}
static unsigned long
dtt_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
struct coiommu_dtt *dtt = container_of(shrink, struct coiommu_dtt,
dtt_shrinker);
return atomic_read(&dtt->pages);
}
static unsigned int dtt_shrink(struct coiommu_dtt *dtt,
struct dtt_parent_entry *parentpt,
void *pt, unsigned int level,
bool *pt_freed)
{
unsigned int free_count = 0;
struct dtt_parent_entry *pte;
bool has_child = false;
u64 phys;
int i;
if (level != DTT_LAST_LEVEL) {
for (i = 0; i < 1 << COIOMMU_UPPER_LEVEL_STRIDE; i++) {
bool child_freed = false;
pte = (struct dtt_parent_entry *)pt + i;
if (!parent_pte_present(pte))
continue;
phys = parent_pte_addr(pte);
free_count += dtt_shrink(dtt, pte, phys_to_virt(phys),
level - 1, &child_freed);
has_child |= !child_freed;
}
}
if (!has_child && parentpt) {
unsigned long flags;
write_lock_irqsave(&dtt->lock, flags);
if (!memcmp(pt, dtt->zero_page, PAGE_SIZE)) {
free_page((unsigned long)pt);
atomic_dec(&dtt->pages);
parentpt->val = 0;
if (pt_freed)
*pt_freed = true;
free_count += 1;
}
write_unlock_irqrestore(&dtt->lock, flags);
}
return free_count;
}
static unsigned long
dtt_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
struct coiommu *coiommu = container_of(shrink, struct coiommu, dtt.dtt_shrinker);
struct coiommu_dtt *dtt = &coiommu->dtt;
unsigned int total = atomic_read(&dtt->pages);
unsigned int free;
coiommu->dev_ops->park_unpin(coiommu->dev, true);
free = dtt_shrink(dtt, NULL, (void *)dtt->root, dtt->level, NULL);
coiommu->dev_ops->park_unpin(coiommu->dev, false);
if (free)
pr_info("coiommu: DTT pages total %u free %u\n", total, free);
return free ? free : SHRINK_STOP;
}
int coiommu_enable_dtt(u64 *dtt_addr, u64 *dtt_level)
{
struct coiommu_dtt *dtt;
int ret;
if (!global_coiommu) {
pr_err("%s: coiommu not exists\n", __func__);
return -EINVAL;
}
dtt = &global_coiommu->dtt;
dtt->root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!dtt->root)
return -ENOMEM;
dtt->level = get_dtt_level();
ret = dtt_page_cache_alloc(dtt);
if (ret)
goto free_root;
dtt->cur_cache = 0;
dtt->worker = kthread_create_worker(0, "coiommu_pagecache_alloc");
if (IS_ERR(dtt->worker)) {
ret = PTR_ERR(dtt->worker);
goto free_page_cache;
}
kthread_init_work(&dtt->alloc_work, alloc_dtt_pages);
atomic_set(&dtt->pages, 0);
if (dtt_addr)
*dtt_addr = (u64)__pa(dtt->root);
if (dtt_level)
*dtt_level = (u64)dtt->level;
/*
* It is possible that the same guest physical page will be mapped
* at the same time by different CPUs. So it is possible to increase
* the map_count at the same time by multiple CPU threads(see mark_pfn).
* To prevent map_count from exceeding the MAP_CNT_MASK, set the
* max map_count to be MAP_CNT_MASK - num_possible_cpus().
*/
dtt->max_map_count = DTTE_MAP_CNT_MASK - num_possible_cpus();
pr_info("%s: coiommu max map_count: 0x%x\n",
__func__, dtt->max_map_count);
return 0;
free_page_cache:
dtt_page_cache_free(dtt);
free_root:
free_page((unsigned long)dtt->root);
dtt->root = NULL;
pr_err("%s: failed with error %d\n", __func__, ret);
return ret;
}
void coiommu_disable_dtt(void)
{
struct coiommu_dtt *dtt;
unsigned long flags;
if (!global_coiommu)
return;
dtt = &global_coiommu->dtt;
if (!dtt->root)
return;
write_lock_irqsave(&dtt->lock, flags);
kthread_destroy_worker(dtt->worker);
dtt_page_cache_free(dtt);
dtt_root_free(dtt);
write_unlock_irqrestore(&dtt->lock, flags);
}
int coiommu_setup_dev_ops(const struct coiommu_dev_ops *ops, void *dev)
{
struct coiommu_dtt *dtt;
if (!ops)
return -EINVAL;
if (!global_coiommu)
return -ENODEV;
/*
* If this is not the first time to set up the
* dev ops, means coiommu is already occupied
* by the driver, and be here because the coiommu
* is removed and re-probed again. Doing so cannot
* bring the coiommu back because removing already
* cleared the DTT which contains the previous mapping
* and pinning status.
*/
if (global_coiommu->dev_ops)
return -EBUSY;
global_coiommu->dev_ops = ops;
global_coiommu->dev = dev;
if (!ops->park_unpin)
return 0;
dtt = &global_coiommu->dtt;
dtt->zero_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!dtt->zero_page)
return -ENOMEM;
dtt->dtt_shrinker.count_objects = dtt_shrink_count;
dtt->dtt_shrinker.scan_objects = dtt_shrink_scan;
dtt->dtt_shrinker.seeks = DEFAULT_SEEKS;
return register_shrinker(&dtt->dtt_shrinker);
}
int coiommu_configure(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
if (pdev->vendor == PCI_VENDOR_ID_COIOMMU &&
pdev->device == PCI_DEVICE_ID_COIOMMU)
return 0;
return coiommu_setup_endpoint(dev);
}
static void coiommu_set_endpoints(struct coiommu *coiommu,
unsigned short ep_count,
unsigned short *endpoints)
{
if (!endpoints)
return;
coiommu->endpoints = kcalloc(ep_count,
sizeof(unsigned short), GFP_KERNEL);
if (!coiommu->endpoints)
return;
memcpy(coiommu->endpoints, endpoints,
ep_count * sizeof(unsigned short));
coiommu->ep_count = ep_count;
}
void coiommu_init(unsigned short ep_count, unsigned short *endpoints)
{
/*
* If already created means it is not the first time
* to init. Just re-use it.
*/
if (global_coiommu) {
pr_warn("%s: coiommu is already initialized\n", __func__);
return;
}
global_coiommu = kzalloc(sizeof(struct coiommu), GFP_KERNEL);
if (!global_coiommu)
return;
rwlock_init(&global_coiommu->dtt.lock);
spin_lock_init(&global_coiommu->dtt.alloc_lock);
coiommu_set_endpoints(global_coiommu, ep_count, endpoints);
}