blob: 090a90f0d02c716a126ec267de87bf59e025b15b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Nathan Scott7b718762005-11-02 14:58:39 +11002 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
Nathan Scott7b718762005-11-02 14:58:39 +11005 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * published by the Free Software Foundation.
8 *
Nathan Scott7b718762005-11-02 14:58:39 +11009 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 *
Nathan Scott7b718762005-11-02 14:58:39 +110014 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs.h"
Christoph Hellwigdda35b82010-02-15 09:44:46 +000019#include "xfs_fs.h"
Dave Chinner70a98832013-10-23 10:36:05 +110020#include "xfs_shared.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110021#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110022#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include "xfs_mount.h"
Dave Chinner57062782013-10-15 09:17:51 +110025#include "xfs_da_format.h"
26#include "xfs_da_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110028#include "xfs_trans.h"
Christoph Hellwigfd3200b2010-02-15 09:44:48 +000029#include "xfs_inode_item.h"
Christoph Hellwigdda35b82010-02-15 09:44:46 +000030#include "xfs_bmap.h"
Dave Chinnerc24b5df2013-08-12 20:49:45 +100031#include "xfs_bmap_util.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include "xfs_error.h"
Dave Chinner2b9ab5a2013-08-12 20:49:37 +100033#include "xfs_dir2.h"
Dave Chinnerc24b5df2013-08-12 20:49:45 +100034#include "xfs_dir2_priv.h"
Christoph Hellwigddcd8562008-12-03 07:55:34 -050035#include "xfs_ioctl.h"
Christoph Hellwigdda35b82010-02-15 09:44:46 +000036#include "xfs_trace.h"
Dave Chinner239880e2013-10-23 10:50:10 +110037#include "xfs_log.h"
Brian Fosterdc06f3982014-07-24 19:49:28 +100038#include "xfs_icache.h"
Christoph Hellwig781355c2015-02-16 11:59:50 +110039#include "xfs_pnfs.h"
Christoph Hellwig68a9f5e2016-06-21 09:53:44 +100040#include "xfs_iomap.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070041
42#include <linux/dcache.h>
Christoph Hellwig2fe17c12011-01-14 13:07:43 +010043#include <linux/falloc.h>
Jeff Liud126d432012-08-21 17:11:57 +080044#include <linux/pagevec.h>
Tejun Heo66114ca2015-05-22 17:13:32 -040045#include <linux/backing-dev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
Alexey Dobriyanf0f37e2f2009-09-27 22:29:37 +040047static const struct vm_operations_struct xfs_file_vm_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
Christoph Hellwigdda35b82010-02-15 09:44:46 +000049/*
Dave Chinner487f84f2011-01-12 11:37:10 +110050 * Locking primitives for read and write IO paths to ensure we consistently use
51 * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
52 */
53static inline void
54xfs_rw_ilock(
55 struct xfs_inode *ip,
56 int type)
57{
58 if (type & XFS_IOLOCK_EXCL)
Al Viro59551022016-01-22 15:40:57 -050059 inode_lock(VFS_I(ip));
Dave Chinner487f84f2011-01-12 11:37:10 +110060 xfs_ilock(ip, type);
61}
62
63static inline void
64xfs_rw_iunlock(
65 struct xfs_inode *ip,
66 int type)
67{
68 xfs_iunlock(ip, type);
69 if (type & XFS_IOLOCK_EXCL)
Al Viro59551022016-01-22 15:40:57 -050070 inode_unlock(VFS_I(ip));
Dave Chinner487f84f2011-01-12 11:37:10 +110071}
72
73static inline void
74xfs_rw_ilock_demote(
75 struct xfs_inode *ip,
76 int type)
77{
78 xfs_ilock_demote(ip, type);
79 if (type & XFS_IOLOCK_EXCL)
Al Viro59551022016-01-22 15:40:57 -050080 inode_unlock(VFS_I(ip));
Dave Chinner487f84f2011-01-12 11:37:10 +110081}
82
Christoph Hellwig68a9f5e2016-06-21 09:53:44 +100083/*
84 * Clear the specified ranges to zero through either the pagecache or DAX.
85 * Holes and unwritten extents will be left as-is as they already are zeroed.
86 */
87int
88xfs_iozero(
89 struct xfs_inode *ip,
90 loff_t pos,
91 size_t count)
92{
Christoph Hellwig459f0fb2016-06-21 09:55:18 +100093 return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
Christoph Hellwig68a9f5e2016-06-21 09:53:44 +100094}
95
Christoph Hellwig8add71c2015-02-02 09:53:56 +110096int
97xfs_update_prealloc_flags(
98 struct xfs_inode *ip,
99 enum xfs_prealloc_flags flags)
100{
101 struct xfs_trans *tp;
102 int error;
103
Christoph Hellwig253f4912016-04-06 09:19:55 +1000104 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
105 0, 0, 0, &tp);
106 if (error)
Christoph Hellwig8add71c2015-02-02 09:53:56 +1100107 return error;
Christoph Hellwig8add71c2015-02-02 09:53:56 +1100108
109 xfs_ilock(ip, XFS_ILOCK_EXCL);
110 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
111
112 if (!(flags & XFS_PREALLOC_INVISIBLE)) {
Dave Chinnerc19b3b052016-02-09 16:54:58 +1100113 VFS_I(ip)->i_mode &= ~S_ISUID;
114 if (VFS_I(ip)->i_mode & S_IXGRP)
115 VFS_I(ip)->i_mode &= ~S_ISGID;
Christoph Hellwig8add71c2015-02-02 09:53:56 +1100116 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
117 }
118
119 if (flags & XFS_PREALLOC_SET)
120 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
121 if (flags & XFS_PREALLOC_CLEAR)
122 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
123
124 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
125 if (flags & XFS_PREALLOC_SYNC)
126 xfs_trans_set_sync(tp);
Christoph Hellwig70393312015-06-04 13:48:08 +1000127 return xfs_trans_commit(tp);
Christoph Hellwig8add71c2015-02-02 09:53:56 +1100128}
129
Christoph Hellwig1da2f2d2011-10-02 14:25:16 +0000130/*
131 * Fsync operations on directories are much simpler than on regular files,
132 * as there is no file data to flush, and thus also no need for explicit
133 * cache flush operations, and there are no non-transaction metadata updates
134 * on directories either.
135 */
136STATIC int
137xfs_dir_fsync(
138 struct file *file,
139 loff_t start,
140 loff_t end,
141 int datasync)
142{
143 struct xfs_inode *ip = XFS_I(file->f_mapping->host);
144 struct xfs_mount *mp = ip->i_mount;
145 xfs_lsn_t lsn = 0;
146
147 trace_xfs_dir_fsync(ip);
148
149 xfs_ilock(ip, XFS_ILOCK_SHARED);
150 if (xfs_ipincount(ip))
151 lsn = ip->i_itemp->ili_last_lsn;
152 xfs_iunlock(ip, XFS_ILOCK_SHARED);
153
154 if (!lsn)
155 return 0;
Dave Chinner24513372014-06-25 14:58:08 +1000156 return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
Christoph Hellwig1da2f2d2011-10-02 14:25:16 +0000157}
158
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000159STATIC int
160xfs_file_fsync(
161 struct file *file,
Josef Bacik02c24a82011-07-16 20:44:56 -0400162 loff_t start,
163 loff_t end,
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000164 int datasync)
165{
Christoph Hellwig7ea80852010-05-26 17:53:25 +0200166 struct inode *inode = file->f_mapping->host;
167 struct xfs_inode *ip = XFS_I(inode);
Christoph Hellwiga27a2632011-06-16 12:02:23 +0000168 struct xfs_mount *mp = ip->i_mount;
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000169 int error = 0;
170 int log_flushed = 0;
Christoph Hellwigb1037052011-09-19 14:55:51 +0000171 xfs_lsn_t lsn = 0;
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000172
Christoph Hellwigcca28fb2010-06-24 11:57:09 +1000173 trace_xfs_file_fsync(ip);
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000174
Josef Bacik02c24a82011-07-16 20:44:56 -0400175 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
176 if (error)
177 return error;
178
Christoph Hellwiga27a2632011-06-16 12:02:23 +0000179 if (XFS_FORCED_SHUTDOWN(mp))
Eric Sandeenb474c7a2014-06-22 15:04:54 +1000180 return -EIO;
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000181
182 xfs_iflags_clear(ip, XFS_ITRUNCATED);
183
Christoph Hellwiga27a2632011-06-16 12:02:23 +0000184 if (mp->m_flags & XFS_MOUNT_BARRIER) {
185 /*
186 * If we have an RT and/or log subvolume we need to make sure
187 * to flush the write cache the device used for file data
188 * first. This is to ensure newly written file data make
189 * it to disk before logging the new inode size in case of
190 * an extending write.
191 */
192 if (XFS_IS_REALTIME_INODE(ip))
193 xfs_blkdev_issue_flush(mp->m_rtdev_targp);
194 else if (mp->m_logdev_targp != mp->m_ddev_targp)
195 xfs_blkdev_issue_flush(mp->m_ddev_targp);
196 }
197
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000198 /*
Dave Chinnerfc0561c2015-11-03 13:14:59 +1100199 * All metadata updates are logged, which means that we just have to
200 * flush the log up to the latest LSN that touched the inode. If we have
201 * concurrent fsync/fdatasync() calls, we need them to all block on the
202 * log force before we clear the ili_fsync_fields field. This ensures
203 * that we don't get a racing sync operation that does not wait for the
204 * metadata to hit the journal before returning. If we race with
205 * clearing the ili_fsync_fields, then all that will happen is the log
206 * force will do nothing as the lsn will already be on disk. We can't
207 * race with setting ili_fsync_fields because that is done under
208 * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
209 * until after the ili_fsync_fields is cleared.
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000210 */
211 xfs_ilock(ip, XFS_ILOCK_SHARED);
Christoph Hellwig8f639dd2012-02-29 09:53:55 +0000212 if (xfs_ipincount(ip)) {
213 if (!datasync ||
Dave Chinnerfc0561c2015-11-03 13:14:59 +1100214 (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
Christoph Hellwig8f639dd2012-02-29 09:53:55 +0000215 lsn = ip->i_itemp->ili_last_lsn;
216 }
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000217
Dave Chinnerfc0561c2015-11-03 13:14:59 +1100218 if (lsn) {
Christoph Hellwigb1037052011-09-19 14:55:51 +0000219 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
Dave Chinnerfc0561c2015-11-03 13:14:59 +1100220 ip->i_itemp->ili_fsync_fields = 0;
221 }
222 xfs_iunlock(ip, XFS_ILOCK_SHARED);
Christoph Hellwigb1037052011-09-19 14:55:51 +0000223
Christoph Hellwiga27a2632011-06-16 12:02:23 +0000224 /*
225 * If we only have a single device, and the log force about was
226 * a no-op we might have to flush the data device cache here.
227 * This can only happen for fdatasync/O_DSYNC if we were overwriting
228 * an already allocated file and thus do not have any metadata to
229 * commit.
230 */
231 if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
232 mp->m_logdev_targp == mp->m_ddev_targp &&
233 !XFS_IS_REALTIME_INODE(ip) &&
234 !log_flushed)
235 xfs_blkdev_issue_flush(mp->m_ddev_targp);
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000236
Dave Chinner24513372014-06-25 14:58:08 +1000237 return error;
Christoph Hellwigfd3200b2010-02-15 09:44:48 +0000238}
239
Christoph Hellwig00258e32010-02-15 09:44:47 +0000240STATIC ssize_t
Al Virob4f5d2c2014-04-02 14:37:59 -0400241xfs_file_read_iter(
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000242 struct kiocb *iocb,
Al Virob4f5d2c2014-04-02 14:37:59 -0400243 struct iov_iter *to)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000244{
245 struct file *file = iocb->ki_filp;
246 struct inode *inode = file->f_mapping->host;
Christoph Hellwig00258e32010-02-15 09:44:47 +0000247 struct xfs_inode *ip = XFS_I(inode);
248 struct xfs_mount *mp = ip->i_mount;
Al Virob4f5d2c2014-04-02 14:37:59 -0400249 size_t size = iov_iter_count(to);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000250 ssize_t ret = 0;
Christoph Hellwig00258e32010-02-15 09:44:47 +0000251 int ioflags = 0;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000252 xfs_fsize_t n;
Al Virob4f5d2c2014-04-02 14:37:59 -0400253 loff_t pos = iocb->ki_pos;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000254
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100255 XFS_STATS_INC(mp, xs_read_calls);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000256
Al Viro2ba48ce2015-04-09 13:52:01 -0400257 if (unlikely(iocb->ki_flags & IOCB_DIRECT))
Dave Chinnerb92cc59f62014-08-04 13:28:20 +1000258 ioflags |= XFS_IO_ISDIRECT;
Christoph Hellwig00258e32010-02-15 09:44:47 +0000259 if (file->f_mode & FMODE_NOCMTIME)
Dave Chinnerb92cc59f62014-08-04 13:28:20 +1000260 ioflags |= XFS_IO_INVIS;
Christoph Hellwig00258e32010-02-15 09:44:47 +0000261
Dave Chinner6b698ed2015-06-04 09:18:53 +1000262 if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000263 xfs_buftarg_t *target =
264 XFS_IS_REALTIME_INODE(ip) ?
265 mp->m_rtdev_targp : mp->m_ddev_targp;
Eric Sandeen7c71ee72014-01-21 16:46:23 -0600266 /* DIO must be aligned to device logical sector size */
267 if ((pos | size) & target->bt_logical_sectormask) {
Dave Chinnerfb595812012-11-12 22:53:57 +1100268 if (pos == i_size_read(inode))
Christoph Hellwig00258e32010-02-15 09:44:47 +0000269 return 0;
Eric Sandeenb474c7a2014-06-22 15:04:54 +1000270 return -EINVAL;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000271 }
272 }
273
Dave Chinnerfb595812012-11-12 22:53:57 +1100274 n = mp->m_super->s_maxbytes - pos;
Christoph Hellwig00258e32010-02-15 09:44:47 +0000275 if (n <= 0 || size == 0)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000276 return 0;
277
278 if (n < size)
279 size = n;
280
281 if (XFS_FORCED_SHUTDOWN(mp))
282 return -EIO;
283
Dave Chinner0c38a252011-08-25 07:17:01 +0000284 /*
Brian Foster3d751af2015-08-19 10:35:04 +1000285 * Locking is a bit tricky here. If we take an exclusive lock for direct
286 * IO, we effectively serialise all new concurrent read IO to this file
287 * and block it behind IO that is currently in progress because IO in
288 * progress holds the IO lock shared. We only need to hold the lock
289 * exclusive to blow away the page cache, so only take lock exclusively
290 * if the page cache needs invalidation. This allows the normal direct
291 * IO case of no page cache pages to proceeed concurrently without
292 * serialisation.
Dave Chinner0c38a252011-08-25 07:17:01 +0000293 */
294 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
Dave Chinnerb92cc59f62014-08-04 13:28:20 +1000295 if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
Dave Chinner0c38a252011-08-25 07:17:01 +0000296 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
Dave Chinner487f84f2011-01-12 11:37:10 +1100297 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
298
Brian Foster3d751af2015-08-19 10:35:04 +1000299 /*
300 * The generic dio code only flushes the range of the particular
301 * I/O. Because we take an exclusive lock here, this whole
302 * sequence is considerably more expensive for us. This has a
303 * noticeable performance impact for any file with cached pages,
304 * even when outside of the range of the particular I/O.
305 *
306 * Hence, amortize the cost of the lock against a full file
307 * flush and reduce the chances of repeated iolock cycles going
308 * forward.
309 */
Christoph Hellwig00258e32010-02-15 09:44:47 +0000310 if (inode->i_mapping->nrpages) {
Brian Foster3d751af2015-08-19 10:35:04 +1000311 ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
Dave Chinner487f84f2011-01-12 11:37:10 +1100312 if (ret) {
313 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
314 return ret;
315 }
Chris Mason85e584d2014-09-02 12:12:52 +1000316
317 /*
318 * Invalidate whole pages. This can return an error if
319 * we fail to invalidate a page, but this should never
320 * happen on XFS. Warn if it does fail.
321 */
Brian Foster3d751af2015-08-19 10:35:04 +1000322 ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
Chris Mason85e584d2014-09-02 12:12:52 +1000323 WARN_ON_ONCE(ret);
324 ret = 0;
Christoph Hellwig00258e32010-02-15 09:44:47 +0000325 }
Dave Chinner487f84f2011-01-12 11:37:10 +1100326 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
Dave Chinner0c38a252011-08-25 07:17:01 +0000327 }
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000328
Dave Chinnerfb595812012-11-12 22:53:57 +1100329 trace_xfs_file_read(ip, size, pos, ioflags);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000330
Al Virob4f5d2c2014-04-02 14:37:59 -0400331 ret = generic_file_read_iter(iocb, to);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000332 if (ret > 0)
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100333 XFS_STATS_ADD(mp, xs_read_bytes, ret);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000334
Dave Chinner487f84f2011-01-12 11:37:10 +1100335 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000336 return ret;
337}
338
Christoph Hellwig00258e32010-02-15 09:44:47 +0000339STATIC ssize_t
340xfs_file_splice_read(
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000341 struct file *infilp,
342 loff_t *ppos,
343 struct pipe_inode_info *pipe,
344 size_t count,
Christoph Hellwig00258e32010-02-15 09:44:47 +0000345 unsigned int flags)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000346{
Christoph Hellwig00258e32010-02-15 09:44:47 +0000347 struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
Christoph Hellwig00258e32010-02-15 09:44:47 +0000348 int ioflags = 0;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000349 ssize_t ret;
350
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100351 XFS_STATS_INC(ip->i_mount, xs_read_calls);
Christoph Hellwig00258e32010-02-15 09:44:47 +0000352
353 if (infilp->f_mode & FMODE_NOCMTIME)
Dave Chinnerb92cc59f62014-08-04 13:28:20 +1000354 ioflags |= XFS_IO_INVIS;
Christoph Hellwig00258e32010-02-15 09:44:47 +0000355
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000356 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
357 return -EIO;
358
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000359 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
360
Dave Chinnera6d76362016-01-04 16:28:25 +1100361 /*
362 * DAX inodes cannot ues the page cache for splice, so we have to push
363 * them through the VFS IO path. This means it goes through
364 * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
365 * cannot lock the splice operation at this level for DAX inodes.
366 */
367 if (IS_DAX(VFS_I(ip))) {
368 ret = default_file_splice_read(infilp, ppos, pipe, count,
369 flags);
370 goto out;
371 }
372
373 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
374 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
375 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
376out:
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000377 if (ret > 0)
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100378 XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000379 return ret;
380}
381
Dave Chinner4c5cfd12011-01-11 10:14:16 +1100382/*
Christoph Hellwig193aec12012-03-27 10:34:49 -0400383 * This routine is called to handle zeroing any space in the last block of the
384 * file that is beyond the EOF. We do this since the size is being increased
385 * without writing anything to that block and we don't want to read the
386 * garbage on the disk.
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000387 */
388STATIC int /* error (positive) */
389xfs_zero_last_block(
Christoph Hellwig193aec12012-03-27 10:34:49 -0400390 struct xfs_inode *ip,
391 xfs_fsize_t offset,
Dave Chinner5885ebd2015-02-23 22:37:08 +1100392 xfs_fsize_t isize,
393 bool *did_zeroing)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000394{
Christoph Hellwig193aec12012-03-27 10:34:49 -0400395 struct xfs_mount *mp = ip->i_mount;
396 xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
397 int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
398 int zero_len;
399 int nimaps = 1;
400 int error = 0;
401 struct xfs_bmbt_irec imap;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000402
Christoph Hellwig193aec12012-03-27 10:34:49 -0400403 xfs_ilock(ip, XFS_ILOCK_EXCL);
Dave Chinner5c8ed202011-09-18 20:40:45 +0000404 error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
Christoph Hellwig193aec12012-03-27 10:34:49 -0400405 xfs_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner5c8ed202011-09-18 20:40:45 +0000406 if (error)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000407 return error;
Christoph Hellwig193aec12012-03-27 10:34:49 -0400408
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000409 ASSERT(nimaps > 0);
Christoph Hellwig193aec12012-03-27 10:34:49 -0400410
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000411 /*
412 * If the block underlying isize is just a hole, then there
413 * is nothing to zero.
414 */
Christoph Hellwig193aec12012-03-27 10:34:49 -0400415 if (imap.br_startblock == HOLESTARTBLOCK)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000416 return 0;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000417
418 zero_len = mp->m_sb.sb_blocksize - zero_offset;
419 if (isize + zero_len > offset)
420 zero_len = offset - isize;
Dave Chinner5885ebd2015-02-23 22:37:08 +1100421 *did_zeroing = true;
Christoph Hellwig193aec12012-03-27 10:34:49 -0400422 return xfs_iozero(ip, isize, zero_len);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000423}
424
425/*
Christoph Hellwig193aec12012-03-27 10:34:49 -0400426 * Zero any on disk space between the current EOF and the new, larger EOF.
427 *
428 * This handles the normal case of zeroing the remainder of the last block in
429 * the file and the unusual case of zeroing blocks out beyond the size of the
430 * file. This second case only happens with fixed size extents and when the
431 * system crashes before the inode size was updated but after blocks were
432 * allocated.
433 *
434 * Expects the iolock to be held exclusive, and will take the ilock internally.
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000435 */
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000436int /* error (positive) */
437xfs_zero_eof(
Christoph Hellwig193aec12012-03-27 10:34:49 -0400438 struct xfs_inode *ip,
439 xfs_off_t offset, /* starting I/O offset */
Dave Chinner5885ebd2015-02-23 22:37:08 +1100440 xfs_fsize_t isize, /* current inode size */
441 bool *did_zeroing)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000442{
Christoph Hellwig193aec12012-03-27 10:34:49 -0400443 struct xfs_mount *mp = ip->i_mount;
444 xfs_fileoff_t start_zero_fsb;
445 xfs_fileoff_t end_zero_fsb;
446 xfs_fileoff_t zero_count_fsb;
447 xfs_fileoff_t last_fsb;
448 xfs_fileoff_t zero_off;
449 xfs_fsize_t zero_len;
450 int nimaps;
451 int error = 0;
452 struct xfs_bmbt_irec imap;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000453
Christoph Hellwig193aec12012-03-27 10:34:49 -0400454 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000455 ASSERT(offset > isize);
456
Brian Foster0a50f162015-10-12 16:02:08 +1100457 trace_xfs_zero_eof(ip, isize, offset - isize);
458
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000459 /*
460 * First handle zeroing the block on which isize resides.
Christoph Hellwig193aec12012-03-27 10:34:49 -0400461 *
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000462 * We only zero a part of that block so it is handled specially.
463 */
Christoph Hellwig193aec12012-03-27 10:34:49 -0400464 if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
Dave Chinner5885ebd2015-02-23 22:37:08 +1100465 error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
Christoph Hellwig193aec12012-03-27 10:34:49 -0400466 if (error)
467 return error;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000468 }
469
470 /*
Christoph Hellwig193aec12012-03-27 10:34:49 -0400471 * Calculate the range between the new size and the old where blocks
472 * needing to be zeroed may exist.
473 *
474 * To get the block where the last byte in the file currently resides,
475 * we need to subtract one from the size and truncate back to a block
476 * boundary. We subtract 1 in case the size is exactly on a block
477 * boundary.
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000478 */
479 last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
480 start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
481 end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
482 ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
483 if (last_fsb == end_zero_fsb) {
484 /*
485 * The size was only incremented on its last block.
486 * We took care of that above, so just return.
487 */
488 return 0;
489 }
490
491 ASSERT(start_zero_fsb <= end_zero_fsb);
492 while (start_zero_fsb <= end_zero_fsb) {
493 nimaps = 1;
494 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
Christoph Hellwig193aec12012-03-27 10:34:49 -0400495
496 xfs_ilock(ip, XFS_ILOCK_EXCL);
Dave Chinner5c8ed202011-09-18 20:40:45 +0000497 error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
498 &imap, &nimaps, 0);
Christoph Hellwig193aec12012-03-27 10:34:49 -0400499 xfs_iunlock(ip, XFS_ILOCK_EXCL);
500 if (error)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000501 return error;
Christoph Hellwig193aec12012-03-27 10:34:49 -0400502
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000503 ASSERT(nimaps > 0);
504
505 if (imap.br_state == XFS_EXT_UNWRITTEN ||
506 imap.br_startblock == HOLESTARTBLOCK) {
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000507 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
508 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
509 continue;
510 }
511
512 /*
513 * There are blocks we need to zero.
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000514 */
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000515 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
516 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
517
518 if ((zero_off + zero_len) > offset)
519 zero_len = offset - zero_off;
520
521 error = xfs_iozero(ip, zero_off, zero_len);
Christoph Hellwig193aec12012-03-27 10:34:49 -0400522 if (error)
523 return error;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000524
Dave Chinner5885ebd2015-02-23 22:37:08 +1100525 *did_zeroing = true;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000526 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
527 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000528 }
529
530 return 0;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000531}
532
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100533/*
Dave Chinner4d8d1582011-01-11 10:23:42 +1100534 * Common pre-write limit and setup checks.
535 *
Christoph Hellwig5bf1f262011-12-18 20:00:13 +0000536 * Called with the iolocked held either shared and exclusive according to
537 * @iolock, and returns with it held. Might upgrade the iolock to exclusive
538 * if called for a direct write beyond i_size.
Dave Chinner4d8d1582011-01-11 10:23:42 +1100539 */
540STATIC ssize_t
541xfs_file_aio_write_checks(
Al Viro99733fa2015-04-07 14:25:18 -0400542 struct kiocb *iocb,
543 struct iov_iter *from,
Dave Chinner4d8d1582011-01-11 10:23:42 +1100544 int *iolock)
545{
Al Viro99733fa2015-04-07 14:25:18 -0400546 struct file *file = iocb->ki_filp;
Dave Chinner4d8d1582011-01-11 10:23:42 +1100547 struct inode *inode = file->f_mapping->host;
548 struct xfs_inode *ip = XFS_I(inode);
Al Viro3309dd02015-04-09 12:55:47 -0400549 ssize_t error = 0;
Al Viro99733fa2015-04-07 14:25:18 -0400550 size_t count = iov_iter_count(from);
Brian Foster3136e8b2015-10-12 16:02:05 +1100551 bool drained_dio = false;
Dave Chinner4d8d1582011-01-11 10:23:42 +1100552
Dave Chinner7271d242011-08-25 07:17:02 +0000553restart:
Al Viro3309dd02015-04-09 12:55:47 -0400554 error = generic_write_checks(iocb, from);
555 if (error <= 0)
Dave Chinner4d8d1582011-01-11 10:23:42 +1100556 return error;
Dave Chinner4d8d1582011-01-11 10:23:42 +1100557
Christoph Hellwig21c3ea12015-04-13 11:38:29 +1000558 error = xfs_break_layouts(inode, iolock, true);
Christoph Hellwig781355c2015-02-16 11:59:50 +1100559 if (error)
560 return error;
561
Jan Karaa6de82c2015-05-21 16:05:56 +0200562 /* For changing security info in file_remove_privs() we need i_mutex */
563 if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
564 xfs_rw_iunlock(ip, *iolock);
565 *iolock = XFS_IOLOCK_EXCL;
566 xfs_rw_ilock(ip, *iolock);
567 goto restart;
568 }
Dave Chinner4d8d1582011-01-11 10:23:42 +1100569 /*
570 * If the offset is beyond the size of the file, we need to zero any
571 * blocks that fall between the existing EOF and the start of this
Christoph Hellwig2813d682011-12-18 20:00:12 +0000572 * write. If zeroing is needed and we are currently holding the
Christoph Hellwig467f7892012-03-27 10:34:47 -0400573 * iolock shared, we need to update it to exclusive which implies
574 * having to redo all checks before.
Dave Chinnerb9d59842015-04-16 22:03:07 +1000575 *
576 * We need to serialise against EOF updates that occur in IO
577 * completions here. We want to make sure that nobody is changing the
578 * size while we do this check until we have placed an IO barrier (i.e.
579 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
580 * The spinlock effectively forms a memory barrier once we have the
581 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
582 * and hence be able to correctly determine if we need to run zeroing.
Dave Chinner4d8d1582011-01-11 10:23:42 +1100583 */
Dave Chinnerb9d59842015-04-16 22:03:07 +1000584 spin_lock(&ip->i_flags_lock);
Al Viro99733fa2015-04-07 14:25:18 -0400585 if (iocb->ki_pos > i_size_read(inode)) {
Dave Chinner5885ebd2015-02-23 22:37:08 +1100586 bool zero = false;
587
Dave Chinnerb9d59842015-04-16 22:03:07 +1000588 spin_unlock(&ip->i_flags_lock);
Brian Foster3136e8b2015-10-12 16:02:05 +1100589 if (!drained_dio) {
590 if (*iolock == XFS_IOLOCK_SHARED) {
591 xfs_rw_iunlock(ip, *iolock);
592 *iolock = XFS_IOLOCK_EXCL;
593 xfs_rw_ilock(ip, *iolock);
594 iov_iter_reexpand(from, count);
595 }
Dave Chinner40c63fb2015-04-16 22:03:17 +1000596 /*
597 * We now have an IO submission barrier in place, but
598 * AIO can do EOF updates during IO completion and hence
599 * we now need to wait for all of them to drain. Non-AIO
600 * DIO will have drained before we are given the
601 * XFS_IOLOCK_EXCL, and so for most cases this wait is a
602 * no-op.
603 */
604 inode_dio_wait(inode);
Brian Foster3136e8b2015-10-12 16:02:05 +1100605 drained_dio = true;
Dave Chinner7271d242011-08-25 07:17:02 +0000606 goto restart;
607 }
Al Viro99733fa2015-04-07 14:25:18 -0400608 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
Christoph Hellwig467f7892012-03-27 10:34:47 -0400609 if (error)
610 return error;
Dave Chinnerb9d59842015-04-16 22:03:07 +1000611 } else
612 spin_unlock(&ip->i_flags_lock);
Dave Chinner4d8d1582011-01-11 10:23:42 +1100613
614 /*
Christoph Hellwig8a9c9982012-02-29 09:53:52 +0000615 * Updating the timestamps will grab the ilock again from
616 * xfs_fs_dirty_inode, so we have to call it after dropping the
617 * lock above. Eventually we should look into a way to avoid
618 * the pointless lock roundtrip.
619 */
Josef Bacikc3b2da32012-03-26 09:59:21 -0400620 if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
621 error = file_update_time(file);
622 if (error)
623 return error;
624 }
Christoph Hellwig8a9c9982012-02-29 09:53:52 +0000625
626 /*
Dave Chinner4d8d1582011-01-11 10:23:42 +1100627 * If we're writing the file then make sure to clear the setuid and
628 * setgid bits if the process is not being run by root. This keeps
629 * people from modifying setuid and setgid binaries.
630 */
Jan Karaa6de82c2015-05-21 16:05:56 +0200631 if (!IS_NOSEC(inode))
632 return file_remove_privs(file);
633 return 0;
Dave Chinner4d8d1582011-01-11 10:23:42 +1100634}
635
636/*
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100637 * xfs_file_dio_aio_write - handle direct IO writes
638 *
639 * Lock the inode appropriately to prepare for and issue a direct IO write.
Dave Chinnereda77982011-01-11 10:22:40 +1100640 * By separating it from the buffered write path we remove all the tricky to
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100641 * follow locking changes and looping.
642 *
Dave Chinnereda77982011-01-11 10:22:40 +1100643 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
644 * until we're sure the bytes at the new EOF have been zeroed and/or the cached
645 * pages are flushed out.
646 *
647 * In most cases the direct IO writes will be done holding IOLOCK_SHARED
648 * allowing them to be done in parallel with reads and other direct IO writes.
649 * However, if the IO is not aligned to filesystem blocks, the direct IO layer
650 * needs to do sub-block zeroing and that requires serialisation against other
651 * direct IOs to the same block. In this case we need to serialise the
652 * submission of the unaligned IOs so that we don't get racing block zeroing in
653 * the dio layer. To avoid the problem with aio, we also need to wait for
654 * outstanding IOs to complete so that unwritten extent conversion is completed
655 * before we try to map the overlapping block. This is currently implemented by
Christoph Hellwig4a06fd22011-08-23 08:28:13 +0000656 * hitting it with a big hammer (i.e. inode_dio_wait()).
Dave Chinnereda77982011-01-11 10:22:40 +1100657 *
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100658 * Returns with locks held indicated by @iolock and errors indicated by
659 * negative return values.
660 */
661STATIC ssize_t
662xfs_file_dio_aio_write(
663 struct kiocb *iocb,
Al Virob3188912014-04-02 07:06:30 -0400664 struct iov_iter *from)
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100665{
666 struct file *file = iocb->ki_filp;
667 struct address_space *mapping = file->f_mapping;
668 struct inode *inode = mapping->host;
669 struct xfs_inode *ip = XFS_I(inode);
670 struct xfs_mount *mp = ip->i_mount;
671 ssize_t ret = 0;
Dave Chinnereda77982011-01-11 10:22:40 +1100672 int unaligned_io = 0;
Christoph Hellwigd0606462011-12-18 20:00:14 +0000673 int iolock;
Al Virob3188912014-04-02 07:06:30 -0400674 size_t count = iov_iter_count(from);
Dave Chinner0cefb292015-04-16 22:03:27 +1000675 loff_t end;
676 struct iov_iter data;
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100677 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
678 mp->m_rtdev_targp : mp->m_ddev_targp;
679
Eric Sandeen7c71ee72014-01-21 16:46:23 -0600680 /* DIO must be aligned to device logical sector size */
Christoph Hellwig13712712016-04-07 08:51:57 -0700681 if (!IS_DAX(inode) &&
682 ((iocb->ki_pos | count) & target->bt_logical_sectormask))
Eric Sandeenb474c7a2014-06-22 15:04:54 +1000683 return -EINVAL;
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100684
Eric Sandeen7c71ee72014-01-21 16:46:23 -0600685 /* "unaligned" here means not aligned to a filesystem block */
Christoph Hellwig13712712016-04-07 08:51:57 -0700686 if ((iocb->ki_pos & mp->m_blockmask) ||
687 ((iocb->ki_pos + count) & mp->m_blockmask))
Dave Chinnereda77982011-01-11 10:22:40 +1100688 unaligned_io = 1;
689
Dave Chinner7271d242011-08-25 07:17:02 +0000690 /*
691 * We don't need to take an exclusive lock unless there page cache needs
692 * to be invalidated or unaligned IO is being executed. We don't need to
693 * consider the EOF extension case here because
694 * xfs_file_aio_write_checks() will relock the inode as necessary for
695 * EOF zeroing cases and fill out the new inode size as appropriate.
696 */
697 if (unaligned_io || mapping->nrpages)
Christoph Hellwigd0606462011-12-18 20:00:14 +0000698 iolock = XFS_IOLOCK_EXCL;
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100699 else
Christoph Hellwigd0606462011-12-18 20:00:14 +0000700 iolock = XFS_IOLOCK_SHARED;
701 xfs_rw_ilock(ip, iolock);
Christoph Hellwigc58cb162011-08-27 14:42:53 +0000702
703 /*
704 * Recheck if there are cached pages that need invalidate after we got
705 * the iolock to protect against other threads adding new pages while
706 * we were waiting for the iolock.
707 */
Christoph Hellwigd0606462011-12-18 20:00:14 +0000708 if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
709 xfs_rw_iunlock(ip, iolock);
710 iolock = XFS_IOLOCK_EXCL;
711 xfs_rw_ilock(ip, iolock);
Christoph Hellwigc58cb162011-08-27 14:42:53 +0000712 }
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100713
Al Viro99733fa2015-04-07 14:25:18 -0400714 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
Dave Chinner4d8d1582011-01-11 10:23:42 +1100715 if (ret)
Christoph Hellwigd0606462011-12-18 20:00:14 +0000716 goto out;
Al Viro99733fa2015-04-07 14:25:18 -0400717 count = iov_iter_count(from);
Christoph Hellwig13712712016-04-07 08:51:57 -0700718 end = iocb->ki_pos + count - 1;
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100719
Brian Foster3d751af2015-08-19 10:35:04 +1000720 /*
721 * See xfs_file_read_iter() for why we do a full-file flush here.
722 */
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100723 if (mapping->nrpages) {
Brian Foster3d751af2015-08-19 10:35:04 +1000724 ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100725 if (ret)
Christoph Hellwigd0606462011-12-18 20:00:14 +0000726 goto out;
Dave Chinner834ffca2014-09-02 12:12:52 +1000727 /*
Brian Foster3d751af2015-08-19 10:35:04 +1000728 * Invalidate whole pages. This can return an error if we fail
729 * to invalidate a page, but this should never happen on XFS.
730 * Warn if it does fail.
Dave Chinner834ffca2014-09-02 12:12:52 +1000731 */
Brian Foster3d751af2015-08-19 10:35:04 +1000732 ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
Dave Chinner834ffca2014-09-02 12:12:52 +1000733 WARN_ON_ONCE(ret);
734 ret = 0;
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100735 }
736
Dave Chinnereda77982011-01-11 10:22:40 +1100737 /*
738 * If we are doing unaligned IO, wait for all other IO to drain,
739 * otherwise demote the lock if we had to flush cached pages
740 */
741 if (unaligned_io)
Christoph Hellwig4a06fd22011-08-23 08:28:13 +0000742 inode_dio_wait(inode);
Christoph Hellwigd0606462011-12-18 20:00:14 +0000743 else if (iolock == XFS_IOLOCK_EXCL) {
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100744 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
Christoph Hellwigd0606462011-12-18 20:00:14 +0000745 iolock = XFS_IOLOCK_SHARED;
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100746 }
747
748 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100749
Dave Chinner0cefb292015-04-16 22:03:27 +1000750 data = *from;
Christoph Hellwigc8b8e322016-04-07 08:51:58 -0700751 ret = mapping->a_ops->direct_IO(iocb, &data);
Dave Chinner0cefb292015-04-16 22:03:27 +1000752
753 /* see generic_file_direct_write() for why this is necessary */
754 if (mapping->nrpages) {
755 invalidate_inode_pages2_range(mapping,
Christoph Hellwig13712712016-04-07 08:51:57 -0700756 iocb->ki_pos >> PAGE_SHIFT,
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300757 end >> PAGE_SHIFT);
Dave Chinner0cefb292015-04-16 22:03:27 +1000758 }
759
760 if (ret > 0) {
Christoph Hellwig13712712016-04-07 08:51:57 -0700761 iocb->ki_pos += ret;
Dave Chinner0cefb292015-04-16 22:03:27 +1000762 iov_iter_advance(from, ret);
Dave Chinner0cefb292015-04-16 22:03:27 +1000763 }
Christoph Hellwigd0606462011-12-18 20:00:14 +0000764out:
765 xfs_rw_iunlock(ip, iolock);
766
Dave Chinner6b698ed2015-06-04 09:18:53 +1000767 /*
768 * No fallback to buffered IO on errors for XFS. DAX can result in
769 * partial writes, but direct IO will either complete fully or fail.
770 */
771 ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
Dave Chinnerf0d26e82011-01-11 10:15:36 +1100772 return ret;
773}
774
Christoph Hellwig00258e32010-02-15 09:44:47 +0000775STATIC ssize_t
Dave Chinner637bbc72011-01-11 10:17:30 +1100776xfs_file_buffered_aio_write(
777 struct kiocb *iocb,
Al Virob3188912014-04-02 07:06:30 -0400778 struct iov_iter *from)
Dave Chinner637bbc72011-01-11 10:17:30 +1100779{
780 struct file *file = iocb->ki_filp;
781 struct address_space *mapping = file->f_mapping;
782 struct inode *inode = mapping->host;
783 struct xfs_inode *ip = XFS_I(inode);
784 ssize_t ret;
785 int enospc = 0;
Christoph Hellwigd0606462011-12-18 20:00:14 +0000786 int iolock = XFS_IOLOCK_EXCL;
Dave Chinner637bbc72011-01-11 10:17:30 +1100787
Christoph Hellwigd0606462011-12-18 20:00:14 +0000788 xfs_rw_ilock(ip, iolock);
Dave Chinner637bbc72011-01-11 10:17:30 +1100789
Al Viro99733fa2015-04-07 14:25:18 -0400790 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
Dave Chinner4d8d1582011-01-11 10:23:42 +1100791 if (ret)
Christoph Hellwigd0606462011-12-18 20:00:14 +0000792 goto out;
Dave Chinner637bbc72011-01-11 10:17:30 +1100793
794 /* We can write back this queue in page reclaim */
Christoph Hellwigde1414a2015-01-14 10:42:36 +0100795 current->backing_dev_info = inode_to_bdi(inode);
Dave Chinner637bbc72011-01-11 10:17:30 +1100796
797write_retry:
Al Viro99733fa2015-04-07 14:25:18 -0400798 trace_xfs_file_buffered_write(ip, iov_iter_count(from),
799 iocb->ki_pos, 0);
Christoph Hellwig68a9f5e2016-06-21 09:53:44 +1000800 ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
Al Viro0a64bc22014-02-11 22:25:22 -0500801 if (likely(ret >= 0))
Al Viro99733fa2015-04-07 14:25:18 -0400802 iocb->ki_pos += ret;
Brian Fosterdc06f3982014-07-24 19:49:28 +1000803
Dave Chinner637bbc72011-01-11 10:17:30 +1100804 /*
Brian Fosterdc06f3982014-07-24 19:49:28 +1000805 * If we hit a space limit, try to free up some lingering preallocated
806 * space before returning an error. In the case of ENOSPC, first try to
807 * write back all dirty inodes to free up some of the excess reserved
808 * metadata space. This reduces the chances that the eofblocks scan
809 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
810 * also behaves as a filter to prevent too many eofblocks scans from
811 * running at the same time.
Dave Chinner637bbc72011-01-11 10:17:30 +1100812 */
Brian Fosterdc06f3982014-07-24 19:49:28 +1000813 if (ret == -EDQUOT && !enospc) {
814 enospc = xfs_inode_free_quota_eofblocks(ip);
815 if (enospc)
816 goto write_retry;
817 } else if (ret == -ENOSPC && !enospc) {
818 struct xfs_eofblocks eofb = {0};
819
Dave Chinner637bbc72011-01-11 10:17:30 +1100820 enospc = 1;
Dave Chinner9aa05002012-10-08 21:56:04 +1100821 xfs_flush_inodes(ip->i_mount);
Brian Fosterdc06f3982014-07-24 19:49:28 +1000822 eofb.eof_scan_owner = ip->i_ino; /* for locking */
823 eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
824 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
Dave Chinner9aa05002012-10-08 21:56:04 +1100825 goto write_retry;
Dave Chinner637bbc72011-01-11 10:17:30 +1100826 }
Christoph Hellwigd0606462011-12-18 20:00:14 +0000827
Dave Chinner637bbc72011-01-11 10:17:30 +1100828 current->backing_dev_info = NULL;
Christoph Hellwigd0606462011-12-18 20:00:14 +0000829out:
830 xfs_rw_iunlock(ip, iolock);
Dave Chinner637bbc72011-01-11 10:17:30 +1100831 return ret;
832}
833
834STATIC ssize_t
Al Virobf97f3bc2014-04-03 14:20:23 -0400835xfs_file_write_iter(
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000836 struct kiocb *iocb,
Al Virobf97f3bc2014-04-03 14:20:23 -0400837 struct iov_iter *from)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000838{
839 struct file *file = iocb->ki_filp;
840 struct address_space *mapping = file->f_mapping;
841 struct inode *inode = mapping->host;
Christoph Hellwig00258e32010-02-15 09:44:47 +0000842 struct xfs_inode *ip = XFS_I(inode);
Dave Chinner637bbc72011-01-11 10:17:30 +1100843 ssize_t ret;
Al Virobf97f3bc2014-04-03 14:20:23 -0400844 size_t ocount = iov_iter_count(from);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000845
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100846 XFS_STATS_INC(ip->i_mount, xs_write_calls);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000847
Dave Chinner637bbc72011-01-11 10:17:30 +1100848 if (ocount == 0)
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000849 return 0;
850
Al Virobf97f3bc2014-04-03 14:20:23 -0400851 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
852 return -EIO;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000853
Dave Chinner6b698ed2015-06-04 09:18:53 +1000854 if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
Al Virobf97f3bc2014-04-03 14:20:23 -0400855 ret = xfs_file_dio_aio_write(iocb, from);
Dave Chinner637bbc72011-01-11 10:17:30 +1100856 else
Al Virobf97f3bc2014-04-03 14:20:23 -0400857 ret = xfs_file_buffered_aio_write(iocb, from);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000858
Christoph Hellwigd0606462011-12-18 20:00:14 +0000859 if (ret > 0) {
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100860 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
Christoph Hellwigce7ae1512011-12-18 20:00:11 +0000861
Christoph Hellwigd0606462011-12-18 20:00:14 +0000862 /* Handle various SYNC-type writes */
Christoph Hellwige2592212016-04-07 08:52:01 -0700863 ret = generic_write_sync(iocb, ret);
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000864 }
Dave Chinnera363f0c2011-01-11 10:13:53 +1100865 return ret;
Christoph Hellwigdda35b82010-02-15 09:44:46 +0000866}
867
Namjae Jeona904b1c2015-03-25 15:08:56 +1100868#define XFS_FALLOC_FL_SUPPORTED \
869 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
870 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
871 FALLOC_FL_INSERT_RANGE)
872
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100873STATIC long
874xfs_file_fallocate(
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700875 struct file *file,
876 int mode,
877 loff_t offset,
878 loff_t len)
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100879{
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700880 struct inode *inode = file_inode(file);
881 struct xfs_inode *ip = XFS_I(inode);
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700882 long error;
Christoph Hellwig8add71c2015-02-02 09:53:56 +1100883 enum xfs_prealloc_flags flags = 0;
Christoph Hellwig781355c2015-02-16 11:59:50 +1100884 uint iolock = XFS_IOLOCK_EXCL;
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700885 loff_t new_size = 0;
Namjae Jeona904b1c2015-03-25 15:08:56 +1100886 bool do_file_insert = 0;
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100887
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700888 if (!S_ISREG(inode->i_mode))
889 return -EINVAL;
Namjae Jeona904b1c2015-03-25 15:08:56 +1100890 if (mode & ~XFS_FALLOC_FL_SUPPORTED)
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100891 return -EOPNOTSUPP;
892
Christoph Hellwig781355c2015-02-16 11:59:50 +1100893 xfs_ilock(ip, iolock);
Christoph Hellwig21c3ea12015-04-13 11:38:29 +1000894 error = xfs_break_layouts(inode, &iolock, false);
Christoph Hellwig781355c2015-02-16 11:59:50 +1100895 if (error)
896 goto out_unlock;
897
Dave Chinnere8e9ad42015-02-23 21:45:32 +1100898 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
899 iolock |= XFS_MMAPLOCK_EXCL;
900
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700901 if (mode & FALLOC_FL_PUNCH_HOLE) {
902 error = xfs_free_file_space(ip, offset, len);
903 if (error)
904 goto out_unlock;
Namjae Jeone1d8fb82014-02-24 10:58:19 +1100905 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
906 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
907
908 if (offset & blksize_mask || len & blksize_mask) {
Dave Chinner24513372014-06-25 14:58:08 +1000909 error = -EINVAL;
Namjae Jeone1d8fb82014-02-24 10:58:19 +1100910 goto out_unlock;
911 }
912
Lukas Czerner23fffa92014-04-12 09:56:41 -0400913 /*
914 * There is no need to overlap collapse range with EOF,
915 * in which case it is effectively a truncate operation
916 */
917 if (offset + len >= i_size_read(inode)) {
Dave Chinner24513372014-06-25 14:58:08 +1000918 error = -EINVAL;
Lukas Czerner23fffa92014-04-12 09:56:41 -0400919 goto out_unlock;
920 }
921
Namjae Jeone1d8fb82014-02-24 10:58:19 +1100922 new_size = i_size_read(inode) - len;
923
924 error = xfs_collapse_file_space(ip, offset, len);
925 if (error)
926 goto out_unlock;
Namjae Jeona904b1c2015-03-25 15:08:56 +1100927 } else if (mode & FALLOC_FL_INSERT_RANGE) {
928 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
929
930 new_size = i_size_read(inode) + len;
931 if (offset & blksize_mask || len & blksize_mask) {
932 error = -EINVAL;
933 goto out_unlock;
934 }
935
936 /* check the new inode size does not wrap through zero */
937 if (new_size > inode->i_sb->s_maxbytes) {
938 error = -EFBIG;
939 goto out_unlock;
940 }
941
942 /* Offset should be less than i_size */
943 if (offset >= i_size_read(inode)) {
944 error = -EINVAL;
945 goto out_unlock;
946 }
947 do_file_insert = 1;
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700948 } else {
Christoph Hellwig8add71c2015-02-02 09:53:56 +1100949 flags |= XFS_PREALLOC_SET;
950
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700951 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
952 offset + len > i_size_read(inode)) {
953 new_size = offset + len;
Dave Chinner24513372014-06-25 14:58:08 +1000954 error = inode_newsize_ok(inode, new_size);
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700955 if (error)
956 goto out_unlock;
957 }
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100958
Lukas Czerner376ba312014-03-13 19:07:58 +1100959 if (mode & FALLOC_FL_ZERO_RANGE)
960 error = xfs_zero_file_space(ip, offset, len);
961 else
962 error = xfs_alloc_file_space(ip, offset, len,
963 XFS_BMAPI_PREALLOC);
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100964 if (error)
965 goto out_unlock;
966 }
967
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700968 if (file->f_flags & O_DSYNC)
Christoph Hellwig8add71c2015-02-02 09:53:56 +1100969 flags |= XFS_PREALLOC_SYNC;
970
971 error = xfs_update_prealloc_flags(ip, flags);
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100972 if (error)
973 goto out_unlock;
974
975 /* Change file size if needed */
976 if (new_size) {
977 struct iattr iattr;
978
979 iattr.ia_valid = ATTR_SIZE;
980 iattr.ia_size = new_size;
Christoph Hellwig83aee9e2013-10-12 00:55:07 -0700981 error = xfs_setattr_size(ip, &iattr);
Namjae Jeona904b1c2015-03-25 15:08:56 +1100982 if (error)
983 goto out_unlock;
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100984 }
985
Namjae Jeona904b1c2015-03-25 15:08:56 +1100986 /*
987 * Perform hole insertion now that the file size has been
988 * updated so that if we crash during the operation we don't
989 * leave shifted extents past EOF and hence losing access to
990 * the data that is contained within them.
991 */
992 if (do_file_insert)
993 error = xfs_insert_file_space(ip, offset, len);
994
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100995out_unlock:
Christoph Hellwig781355c2015-02-16 11:59:50 +1100996 xfs_iunlock(ip, iolock);
Dave Chinner24513372014-06-25 14:58:08 +1000997 return error;
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100998}
999
1000
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001STATIC int
Nathan Scott3562fd42006-03-14 14:00:35 +11001002xfs_file_open(
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 struct inode *inode,
Christoph Hellwigf999a5b2008-11-28 14:23:32 +11001004 struct file *file)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005{
Christoph Hellwigf999a5b2008-11-28 14:23:32 +11001006 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 return -EFBIG;
Christoph Hellwigf999a5b2008-11-28 14:23:32 +11001008 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
1009 return -EIO;
1010 return 0;
1011}
1012
1013STATIC int
1014xfs_dir_open(
1015 struct inode *inode,
1016 struct file *file)
1017{
1018 struct xfs_inode *ip = XFS_I(inode);
1019 int mode;
1020 int error;
1021
1022 error = xfs_file_open(inode, file);
1023 if (error)
1024 return error;
1025
1026 /*
1027 * If there are any blocks, read-ahead block 0 as we're almost
1028 * certain to have the next operation be a read there.
1029 */
Christoph Hellwig309ecac82013-12-06 12:30:09 -08001030 mode = xfs_ilock_data_map_shared(ip);
Christoph Hellwigf999a5b2008-11-28 14:23:32 +11001031 if (ip->i_d.di_nextents > 0)
Eric Sandeen9df2dd02014-04-14 19:01:59 +10001032 xfs_dir3_data_readahead(ip, 0, -1);
Christoph Hellwigf999a5b2008-11-28 14:23:32 +11001033 xfs_iunlock(ip, mode);
1034 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035}
1036
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037STATIC int
Nathan Scott3562fd42006-03-14 14:00:35 +11001038xfs_file_release(
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 struct inode *inode,
1040 struct file *filp)
1041{
Dave Chinner24513372014-06-25 14:58:08 +10001042 return xfs_release(XFS_I(inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043}
1044
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045STATIC int
Nathan Scott3562fd42006-03-14 14:00:35 +11001046xfs_file_readdir(
Al Virob8227552013-05-22 17:07:56 -04001047 struct file *file,
1048 struct dir_context *ctx)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049{
Al Virob8227552013-05-22 17:07:56 -04001050 struct inode *inode = file_inode(file);
Christoph Hellwig739bfb22007-08-29 10:58:01 +10001051 xfs_inode_t *ip = XFS_I(inode);
Christoph Hellwig051e7cd2007-08-28 13:58:24 +10001052 size_t bufsize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053
Christoph Hellwig051e7cd2007-08-28 13:58:24 +10001054 /*
1055 * The Linux API doesn't pass down the total size of the buffer
1056 * we read into down to the filesystem. With the filldir concept
1057 * it's not needed for correct information, but the XFS dir2 leaf
1058 * code wants an estimate of the buffer size to calculate it's
1059 * readahead window and size the buffers used for mapping to
1060 * physical blocks.
1061 *
1062 * Try to give it an estimate that's good enough, maybe at some
1063 * point we can change the ->readdir prototype to include the
Eric Sandeena9cc7992010-02-03 17:50:13 +00001064 * buffer size. For now we use the current glibc buffer size.
Christoph Hellwig051e7cd2007-08-28 13:58:24 +10001065 */
Eric Sandeena9cc7992010-02-03 17:50:13 +00001066 bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067
kbuild test robot83004752014-12-01 08:25:28 +11001068 return xfs_readdir(ip, ctx, bufsize);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069}
1070
David Chinner4f57dbc2007-07-19 16:28:17 +10001071/*
Jeff Liud126d432012-08-21 17:11:57 +08001072 * This type is designed to indicate the type of offset we would like
Eric Sandeen49c69592014-09-09 11:56:48 +10001073 * to search from page cache for xfs_seek_hole_data().
Jeff Liud126d432012-08-21 17:11:57 +08001074 */
1075enum {
1076 HOLE_OFF = 0,
1077 DATA_OFF,
1078};
1079
1080/*
1081 * Lookup the desired type of offset from the given page.
1082 *
1083 * On success, return true and the offset argument will point to the
1084 * start of the region that was found. Otherwise this function will
1085 * return false and keep the offset argument unchanged.
1086 */
1087STATIC bool
1088xfs_lookup_buffer_offset(
1089 struct page *page,
1090 loff_t *offset,
1091 unsigned int type)
1092{
1093 loff_t lastoff = page_offset(page);
1094 bool found = false;
1095 struct buffer_head *bh, *head;
1096
1097 bh = head = page_buffers(page);
1098 do {
1099 /*
1100 * Unwritten extents that have data in the page
1101 * cache covering them can be identified by the
1102 * BH_Unwritten state flag. Pages with multiple
1103 * buffers might have a mix of holes, data and
1104 * unwritten extents - any buffer with valid
1105 * data in it should have BH_Uptodate flag set
1106 * on it.
1107 */
1108 if (buffer_unwritten(bh) ||
1109 buffer_uptodate(bh)) {
1110 if (type == DATA_OFF)
1111 found = true;
1112 } else {
1113 if (type == HOLE_OFF)
1114 found = true;
1115 }
1116
1117 if (found) {
1118 *offset = lastoff;
1119 break;
1120 }
1121 lastoff += bh->b_size;
1122 } while ((bh = bh->b_this_page) != head);
1123
1124 return found;
1125}
1126
1127/*
1128 * This routine is called to find out and return a data or hole offset
1129 * from the page cache for unwritten extents according to the desired
Eric Sandeen49c69592014-09-09 11:56:48 +10001130 * type for xfs_seek_hole_data().
Jeff Liud126d432012-08-21 17:11:57 +08001131 *
1132 * The argument offset is used to tell where we start to search from the
1133 * page cache. Map is used to figure out the end points of the range to
1134 * lookup pages.
1135 *
1136 * Return true if the desired type of offset was found, and the argument
1137 * offset is filled with that address. Otherwise, return false and keep
1138 * offset unchanged.
1139 */
1140STATIC bool
1141xfs_find_get_desired_pgoff(
1142 struct inode *inode,
1143 struct xfs_bmbt_irec *map,
1144 unsigned int type,
1145 loff_t *offset)
1146{
1147 struct xfs_inode *ip = XFS_I(inode);
1148 struct xfs_mount *mp = ip->i_mount;
1149 struct pagevec pvec;
1150 pgoff_t index;
1151 pgoff_t end;
1152 loff_t endoff;
1153 loff_t startoff = *offset;
1154 loff_t lastoff = startoff;
1155 bool found = false;
1156
1157 pagevec_init(&pvec, 0);
1158
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001159 index = startoff >> PAGE_SHIFT;
Jeff Liud126d432012-08-21 17:11:57 +08001160 endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001161 end = endoff >> PAGE_SHIFT;
Jeff Liud126d432012-08-21 17:11:57 +08001162 do {
1163 int want;
1164 unsigned nr_pages;
1165 unsigned int i;
1166
1167 want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
1168 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
1169 want);
1170 /*
1171 * No page mapped into given range. If we are searching holes
1172 * and if this is the first time we got into the loop, it means
1173 * that the given offset is landed in a hole, return it.
1174 *
1175 * If we have already stepped through some block buffers to find
1176 * holes but they all contains data. In this case, the last
1177 * offset is already updated and pointed to the end of the last
1178 * mapped page, if it does not reach the endpoint to search,
1179 * that means there should be a hole between them.
1180 */
1181 if (nr_pages == 0) {
1182 /* Data search found nothing */
1183 if (type == DATA_OFF)
1184 break;
1185
1186 ASSERT(type == HOLE_OFF);
1187 if (lastoff == startoff || lastoff < endoff) {
1188 found = true;
1189 *offset = lastoff;
1190 }
1191 break;
1192 }
1193
1194 /*
1195 * At lease we found one page. If this is the first time we
1196 * step into the loop, and if the first page index offset is
1197 * greater than the given search offset, a hole was found.
1198 */
1199 if (type == HOLE_OFF && lastoff == startoff &&
1200 lastoff < page_offset(pvec.pages[0])) {
1201 found = true;
1202 break;
1203 }
1204
1205 for (i = 0; i < nr_pages; i++) {
1206 struct page *page = pvec.pages[i];
1207 loff_t b_offset;
1208
1209 /*
1210 * At this point, the page may be truncated or
1211 * invalidated (changing page->mapping to NULL),
1212 * or even swizzled back from swapper_space to tmpfs
1213 * file mapping. However, page->index will not change
1214 * because we have a reference on the page.
1215 *
1216 * Searching done if the page index is out of range.
1217 * If the current offset is not reaches the end of
1218 * the specified search range, there should be a hole
1219 * between them.
1220 */
1221 if (page->index > end) {
1222 if (type == HOLE_OFF && lastoff < endoff) {
1223 *offset = lastoff;
1224 found = true;
1225 }
1226 goto out;
1227 }
1228
1229 lock_page(page);
1230 /*
1231 * Page truncated or invalidated(page->mapping == NULL).
1232 * We can freely skip it and proceed to check the next
1233 * page.
1234 */
1235 if (unlikely(page->mapping != inode->i_mapping)) {
1236 unlock_page(page);
1237 continue;
1238 }
1239
1240 if (!page_has_buffers(page)) {
1241 unlock_page(page);
1242 continue;
1243 }
1244
1245 found = xfs_lookup_buffer_offset(page, &b_offset, type);
1246 if (found) {
1247 /*
1248 * The found offset may be less than the start
1249 * point to search if this is the first time to
1250 * come here.
1251 */
1252 *offset = max_t(loff_t, startoff, b_offset);
1253 unlock_page(page);
1254 goto out;
1255 }
1256
1257 /*
1258 * We either searching data but nothing was found, or
1259 * searching hole but found a data buffer. In either
1260 * case, probably the next page contains the desired
1261 * things, update the last offset to it so.
1262 */
1263 lastoff = page_offset(page) + PAGE_SIZE;
1264 unlock_page(page);
1265 }
1266
1267 /*
1268 * The number of returned pages less than our desired, search
1269 * done. In this case, nothing was found for searching data,
1270 * but we found a hole behind the last offset.
1271 */
1272 if (nr_pages < want) {
1273 if (type == HOLE_OFF) {
1274 *offset = lastoff;
1275 found = true;
1276 }
1277 break;
1278 }
1279
1280 index = pvec.pages[i - 1]->index + 1;
1281 pagevec_release(&pvec);
1282 } while (index <= end);
1283
1284out:
1285 pagevec_release(&pvec);
1286 return found;
1287}
1288
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001289/*
1290 * caller must lock inode with xfs_ilock_data_map_shared,
1291 * can we craft an appropriate ASSERT?
1292 *
1293 * end is because the VFS-level lseek interface is defined such that any
1294 * offset past i_size shall return -ENXIO, but we use this for quota code
1295 * which does not maintain i_size, and we want to SEEK_DATA past i_size.
1296 */
1297loff_t
1298__xfs_seek_hole_data(
1299 struct inode *inode,
Eric Sandeen49c69592014-09-09 11:56:48 +10001300 loff_t start,
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001301 loff_t end,
Eric Sandeen49c69592014-09-09 11:56:48 +10001302 int whence)
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001303{
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001304 struct xfs_inode *ip = XFS_I(inode);
1305 struct xfs_mount *mp = ip->i_mount;
1306 loff_t uninitialized_var(offset);
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001307 xfs_fileoff_t fsbno;
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001308 xfs_filblks_t lastbno;
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001309 int error;
1310
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001311 if (start >= end) {
Dave Chinner24513372014-06-25 14:58:08 +10001312 error = -ENXIO;
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001313 goto out_error;
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001314 }
1315
Eric Sandeen49c69592014-09-09 11:56:48 +10001316 /*
1317 * Try to read extents from the first block indicated
1318 * by fsbno to the end block of the file.
1319 */
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001320 fsbno = XFS_B_TO_FSBT(mp, start);
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001321 lastbno = XFS_B_TO_FSB(mp, end);
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001322
Jeff Liub686d1f2012-08-21 17:12:18 +08001323 for (;;) {
1324 struct xfs_bmbt_irec map[2];
1325 int nmap = 2;
1326 unsigned int i;
1327
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001328 error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
Jeff Liub686d1f2012-08-21 17:12:18 +08001329 XFS_BMAPI_ENTIRE);
1330 if (error)
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001331 goto out_error;
Jeff Liub686d1f2012-08-21 17:12:18 +08001332
1333 /* No extents at given offset, must be beyond EOF */
1334 if (nmap == 0) {
Dave Chinner24513372014-06-25 14:58:08 +10001335 error = -ENXIO;
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001336 goto out_error;
Jeff Liub686d1f2012-08-21 17:12:18 +08001337 }
1338
1339 for (i = 0; i < nmap; i++) {
1340 offset = max_t(loff_t, start,
1341 XFS_FSB_TO_B(mp, map[i].br_startoff));
1342
Eric Sandeen49c69592014-09-09 11:56:48 +10001343 /* Landed in the hole we wanted? */
1344 if (whence == SEEK_HOLE &&
1345 map[i].br_startblock == HOLESTARTBLOCK)
1346 goto out;
1347
1348 /* Landed in the data extent we wanted? */
1349 if (whence == SEEK_DATA &&
1350 (map[i].br_startblock == DELAYSTARTBLOCK ||
1351 (map[i].br_state == XFS_EXT_NORM &&
1352 !isnullstartblock(map[i].br_startblock))))
Jeff Liub686d1f2012-08-21 17:12:18 +08001353 goto out;
1354
1355 /*
Eric Sandeen49c69592014-09-09 11:56:48 +10001356 * Landed in an unwritten extent, try to search
1357 * for hole or data from page cache.
Jeff Liub686d1f2012-08-21 17:12:18 +08001358 */
1359 if (map[i].br_state == XFS_EXT_UNWRITTEN) {
1360 if (xfs_find_get_desired_pgoff(inode, &map[i],
Eric Sandeen49c69592014-09-09 11:56:48 +10001361 whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF,
1362 &offset))
Jeff Liub686d1f2012-08-21 17:12:18 +08001363 goto out;
1364 }
1365 }
1366
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001367 /*
Eric Sandeen49c69592014-09-09 11:56:48 +10001368 * We only received one extent out of the two requested. This
1369 * means we've hit EOF and didn't find what we are looking for.
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001370 */
Jeff Liub686d1f2012-08-21 17:12:18 +08001371 if (nmap == 1) {
Eric Sandeen49c69592014-09-09 11:56:48 +10001372 /*
1373 * If we were looking for a hole, set offset to
1374 * the end of the file (i.e., there is an implicit
1375 * hole at the end of any file).
1376 */
1377 if (whence == SEEK_HOLE) {
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001378 offset = end;
Eric Sandeen49c69592014-09-09 11:56:48 +10001379 break;
1380 }
1381 /*
1382 * If we were looking for data, it's nowhere to be found
1383 */
1384 ASSERT(whence == SEEK_DATA);
1385 error = -ENXIO;
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001386 goto out_error;
Jeff Liub686d1f2012-08-21 17:12:18 +08001387 }
1388
1389 ASSERT(i > 1);
1390
1391 /*
Eric Sandeen49c69592014-09-09 11:56:48 +10001392 * Nothing was found, proceed to the next round of search
1393 * if the next reading offset is not at or beyond EOF.
Jeff Liub686d1f2012-08-21 17:12:18 +08001394 */
1395 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1396 start = XFS_FSB_TO_B(mp, fsbno);
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001397 if (start >= end) {
Eric Sandeen49c69592014-09-09 11:56:48 +10001398 if (whence == SEEK_HOLE) {
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001399 offset = end;
Eric Sandeen49c69592014-09-09 11:56:48 +10001400 break;
1401 }
1402 ASSERT(whence == SEEK_DATA);
1403 error = -ENXIO;
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001404 goto out_error;
Jeff Liub686d1f2012-08-21 17:12:18 +08001405 }
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001406 }
1407
Jeff Liub686d1f2012-08-21 17:12:18 +08001408out:
1409 /*
Eric Sandeen49c69592014-09-09 11:56:48 +10001410 * If at this point we have found the hole we wanted, the returned
Jeff Liub686d1f2012-08-21 17:12:18 +08001411 * offset may be bigger than the file size as it may be aligned to
Eric Sandeen49c69592014-09-09 11:56:48 +10001412 * page boundary for unwritten extents. We need to deal with this
Jeff Liub686d1f2012-08-21 17:12:18 +08001413 * situation in particular.
1414 */
Eric Sandeen49c69592014-09-09 11:56:48 +10001415 if (whence == SEEK_HOLE)
Eric Sandeen8aa7d372016-02-08 11:25:16 +11001416 offset = min_t(loff_t, offset, end);
1417
1418 return offset;
1419
1420out_error:
1421 return error;
1422}
1423
1424STATIC loff_t
1425xfs_seek_hole_data(
1426 struct file *file,
1427 loff_t start,
1428 int whence)
1429{
1430 struct inode *inode = file->f_mapping->host;
1431 struct xfs_inode *ip = XFS_I(inode);
1432 struct xfs_mount *mp = ip->i_mount;
1433 uint lock;
1434 loff_t offset, end;
1435 int error = 0;
1436
1437 if (XFS_FORCED_SHUTDOWN(mp))
1438 return -EIO;
1439
1440 lock = xfs_ilock_data_map_shared(ip);
1441
1442 end = i_size_read(inode);
1443 offset = __xfs_seek_hole_data(inode, start, end, whence);
1444 if (offset < 0) {
1445 error = offset;
1446 goto out_unlock;
1447 }
1448
Jie Liu46a1c2c72013-06-25 12:02:13 +08001449 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001450
1451out_unlock:
Christoph Hellwig01f4f322013-12-06 12:30:08 -08001452 xfs_iunlock(ip, lock);
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001453
1454 if (error)
Dave Chinner24513372014-06-25 14:58:08 +10001455 return error;
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001456 return offset;
1457}
1458
1459STATIC loff_t
1460xfs_file_llseek(
1461 struct file *file,
1462 loff_t offset,
Eric Sandeen59f9c002014-09-09 11:57:10 +10001463 int whence)
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001464{
Eric Sandeen59f9c002014-09-09 11:57:10 +10001465 switch (whence) {
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001466 case SEEK_END:
1467 case SEEK_CUR:
1468 case SEEK_SET:
Eric Sandeen59f9c002014-09-09 11:57:10 +10001469 return generic_file_llseek(file, offset, whence);
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001470 case SEEK_HOLE:
Eric Sandeen49c69592014-09-09 11:56:48 +10001471 case SEEK_DATA:
Eric Sandeen59f9c002014-09-09 11:57:10 +10001472 return xfs_seek_hole_data(file, offset, whence);
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001473 default:
1474 return -EINVAL;
1475 }
1476}
1477
Dave Chinnerde0e8c22015-02-23 21:44:19 +11001478/*
1479 * Locking for serialisation of IO during page faults. This results in a lock
1480 * ordering of:
1481 *
1482 * mmap_sem (MM)
Dave Chinner6b698ed2015-06-04 09:18:53 +10001483 * sb_start_pagefault(vfs, freeze)
Dave Chinner13ad4fe2015-11-03 12:37:02 +11001484 * i_mmaplock (XFS - truncate serialisation)
Dave Chinner6b698ed2015-06-04 09:18:53 +10001485 * page_lock (MM)
1486 * i_lock (XFS - extent map serialisation)
Dave Chinnerde0e8c22015-02-23 21:44:19 +11001487 */
Dave Chinnerde0e8c22015-02-23 21:44:19 +11001488
Dave Chinner075a9242015-02-23 21:44:54 +11001489/*
1490 * mmap()d file has taken write protection fault and is being made writable. We
1491 * can set the page state up correctly for a writable page, which means we can
1492 * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
1493 * mapping.
1494 */
1495STATIC int
1496xfs_filemap_page_mkwrite(
1497 struct vm_area_struct *vma,
1498 struct vm_fault *vmf)
1499{
Dave Chinner6b698ed2015-06-04 09:18:53 +10001500 struct inode *inode = file_inode(vma->vm_file);
Dave Chinnerec56b1f2015-06-04 09:18:18 +10001501 int ret;
Dave Chinner075a9242015-02-23 21:44:54 +11001502
Dave Chinner6b698ed2015-06-04 09:18:53 +10001503 trace_xfs_filemap_page_mkwrite(XFS_I(inode));
Dave Chinner075a9242015-02-23 21:44:54 +11001504
Dave Chinner6b698ed2015-06-04 09:18:53 +10001505 sb_start_pagefault(inode->i_sb);
Dave Chinnerec56b1f2015-06-04 09:18:18 +10001506 file_update_time(vma->vm_file);
Dave Chinner6b698ed2015-06-04 09:18:53 +10001507 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1508
1509 if (IS_DAX(inode)) {
Jan Kara02fbd132016-05-11 11:58:48 +02001510 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
Dave Chinner6b698ed2015-06-04 09:18:53 +10001511 } else {
Christoph Hellwig68a9f5e2016-06-21 09:53:44 +10001512 ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
Dave Chinner6b698ed2015-06-04 09:18:53 +10001513 ret = block_page_mkwrite_return(ret);
1514 }
1515
1516 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1517 sb_end_pagefault(inode->i_sb);
1518
1519 return ret;
1520}
1521
1522STATIC int
1523xfs_filemap_fault(
1524 struct vm_area_struct *vma,
1525 struct vm_fault *vmf)
1526{
Dave Chinnerb2442c52015-07-29 11:48:00 +10001527 struct inode *inode = file_inode(vma->vm_file);
Dave Chinner6b698ed2015-06-04 09:18:53 +10001528 int ret;
1529
Dave Chinnerb2442c52015-07-29 11:48:00 +10001530 trace_xfs_filemap_fault(XFS_I(inode));
Dave Chinner6b698ed2015-06-04 09:18:53 +10001531
1532 /* DAX can shortcut the normal fault path on write faults! */
Dave Chinnerb2442c52015-07-29 11:48:00 +10001533 if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
Dave Chinner6b698ed2015-06-04 09:18:53 +10001534 return xfs_filemap_page_mkwrite(vma, vmf);
Dave Chinner075a9242015-02-23 21:44:54 +11001535
Dave Chinnerb2442c52015-07-29 11:48:00 +10001536 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1537 if (IS_DAX(inode)) {
1538 /*
1539 * we do not want to trigger unwritten extent conversion on read
1540 * faults - that is unnecessary overhead and would also require
1541 * changes to xfs_get_blocks_direct() to map unwritten extent
1542 * ioend for conversion on read-only mappings.
1543 */
Jan Kara02fbd132016-05-11 11:58:48 +02001544 ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
Dave Chinnerb2442c52015-07-29 11:48:00 +10001545 } else
1546 ret = filemap_fault(vma, vmf);
1547 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
Dave Chinner075a9242015-02-23 21:44:54 +11001548
Dave Chinner6b698ed2015-06-04 09:18:53 +10001549 return ret;
1550}
1551
Dave Chinner13ad4fe2015-11-03 12:37:02 +11001552/*
1553 * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
1554 * both read and write faults. Hence we need to handle both cases. There is no
1555 * ->pmd_mkwrite callout for huge pages, so we have a single function here to
1556 * handle both cases here. @flags carries the information on the type of fault
1557 * occuring.
1558 */
Matthew Wilcoxacd76e72015-09-08 14:59:06 -07001559STATIC int
1560xfs_filemap_pmd_fault(
1561 struct vm_area_struct *vma,
1562 unsigned long addr,
1563 pmd_t *pmd,
1564 unsigned int flags)
1565{
1566 struct inode *inode = file_inode(vma->vm_file);
1567 struct xfs_inode *ip = XFS_I(inode);
1568 int ret;
1569
1570 if (!IS_DAX(inode))
1571 return VM_FAULT_FALLBACK;
1572
1573 trace_xfs_filemap_pmd_fault(ip);
1574
Dave Chinner13ad4fe2015-11-03 12:37:02 +11001575 if (flags & FAULT_FLAG_WRITE) {
1576 sb_start_pagefault(inode->i_sb);
1577 file_update_time(vma->vm_file);
1578 }
1579
Matthew Wilcoxacd76e72015-09-08 14:59:06 -07001580 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
Jan Kara02fbd132016-05-11 11:58:48 +02001581 ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
Matthew Wilcoxacd76e72015-09-08 14:59:06 -07001582 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
Dave Chinner13ad4fe2015-11-03 12:37:02 +11001583
1584 if (flags & FAULT_FLAG_WRITE)
1585 sb_end_pagefault(inode->i_sb);
Matthew Wilcoxacd76e72015-09-08 14:59:06 -07001586
1587 return ret;
1588}
1589
Dave Chinner3af49282015-11-03 12:37:02 +11001590/*
1591 * pfn_mkwrite was originally inteneded to ensure we capture time stamp
1592 * updates on write faults. In reality, it's need to serialise against
Ross Zwisler5eb88dc2016-01-22 15:10:56 -08001593 * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
1594 * to ensure we serialise the fault barrier in place.
Dave Chinner3af49282015-11-03 12:37:02 +11001595 */
1596static int
1597xfs_filemap_pfn_mkwrite(
1598 struct vm_area_struct *vma,
1599 struct vm_fault *vmf)
1600{
1601
1602 struct inode *inode = file_inode(vma->vm_file);
1603 struct xfs_inode *ip = XFS_I(inode);
1604 int ret = VM_FAULT_NOPAGE;
1605 loff_t size;
1606
1607 trace_xfs_filemap_pfn_mkwrite(ip);
1608
1609 sb_start_pagefault(inode->i_sb);
1610 file_update_time(vma->vm_file);
1611
1612 /* check if the faulting page hasn't raced with truncate */
1613 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1614 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1615 if (vmf->pgoff >= size)
1616 ret = VM_FAULT_SIGBUS;
Ross Zwisler5eb88dc2016-01-22 15:10:56 -08001617 else if (IS_DAX(inode))
1618 ret = dax_pfn_mkwrite(vma, vmf);
Dave Chinner3af49282015-11-03 12:37:02 +11001619 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1620 sb_end_pagefault(inode->i_sb);
1621 return ret;
1622
1623}
1624
Dave Chinner6b698ed2015-06-04 09:18:53 +10001625static const struct vm_operations_struct xfs_file_vm_ops = {
1626 .fault = xfs_filemap_fault,
Matthew Wilcoxacd76e72015-09-08 14:59:06 -07001627 .pmd_fault = xfs_filemap_pmd_fault,
Dave Chinner6b698ed2015-06-04 09:18:53 +10001628 .map_pages = filemap_map_pages,
1629 .page_mkwrite = xfs_filemap_page_mkwrite,
Dave Chinner3af49282015-11-03 12:37:02 +11001630 .pfn_mkwrite = xfs_filemap_pfn_mkwrite,
Dave Chinner6b698ed2015-06-04 09:18:53 +10001631};
1632
1633STATIC int
1634xfs_file_mmap(
1635 struct file *filp,
1636 struct vm_area_struct *vma)
1637{
1638 file_accessed(filp);
1639 vma->vm_ops = &xfs_file_vm_ops;
1640 if (IS_DAX(file_inode(filp)))
Matthew Wilcoxacd76e72015-09-08 14:59:06 -07001641 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
Dave Chinner6b698ed2015-06-04 09:18:53 +10001642 return 0;
Dave Chinner075a9242015-02-23 21:44:54 +11001643}
1644
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -08001645const struct file_operations xfs_file_operations = {
Jeff Liu3fe3e6b2012-05-10 21:29:17 +08001646 .llseek = xfs_file_llseek,
Al Virob4f5d2c2014-04-02 14:37:59 -04001647 .read_iter = xfs_file_read_iter,
Al Virobf97f3bc2014-04-03 14:20:23 -04001648 .write_iter = xfs_file_write_iter,
Nathan Scott1b895842006-03-31 13:08:59 +10001649 .splice_read = xfs_file_splice_read,
Al Viro8d020762014-04-05 04:27:08 -04001650 .splice_write = iter_file_splice_write,
Nathan Scott3562fd42006-03-14 14:00:35 +11001651 .unlocked_ioctl = xfs_file_ioctl,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652#ifdef CONFIG_COMPAT
Nathan Scott3562fd42006-03-14 14:00:35 +11001653 .compat_ioctl = xfs_file_compat_ioctl,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654#endif
Nathan Scott3562fd42006-03-14 14:00:35 +11001655 .mmap = xfs_file_mmap,
1656 .open = xfs_file_open,
1657 .release = xfs_file_release,
1658 .fsync = xfs_file_fsync,
Christoph Hellwig2fe17c12011-01-14 13:07:43 +01001659 .fallocate = xfs_file_fallocate,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660};
1661
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -08001662const struct file_operations xfs_dir_file_operations = {
Christoph Hellwigf999a5b2008-11-28 14:23:32 +11001663 .open = xfs_dir_open,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 .read = generic_read_dir,
Al Viro3b0a3c12016-04-20 23:42:46 -04001665 .iterate_shared = xfs_file_readdir,
Al Viro59af1582008-08-24 07:24:41 -04001666 .llseek = generic_file_llseek,
Nathan Scott3562fd42006-03-14 14:00:35 +11001667 .unlocked_ioctl = xfs_file_ioctl,
Nathan Scottd3870392005-05-06 06:44:46 -07001668#ifdef CONFIG_COMPAT
Nathan Scott3562fd42006-03-14 14:00:35 +11001669 .compat_ioctl = xfs_file_compat_ioctl,
Nathan Scottd3870392005-05-06 06:44:46 -07001670#endif
Christoph Hellwig1da2f2d2011-10-02 14:25:16 +00001671 .fsync = xfs_dir_fsync,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672};