xfs: add CRC checking to dir2 data blocks
[firefly-linux-kernel-4.4.55.git] / fs / xfs / xfs_dir2_leaf.c
1 /*
2  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_mount.h"
27 #include "xfs_da_btree.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_dinode.h"
30 #include "xfs_inode.h"
31 #include "xfs_bmap.h"
32 #include "xfs_dir2_format.h"
33 #include "xfs_dir2_priv.h"
34 #include "xfs_error.h"
35 #include "xfs_trace.h"
36
37 /*
38  * Local function declarations.
39  */
40 #ifdef DEBUG
41 static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
42 #else
43 #define xfs_dir2_leaf_check(dp, bp)
44 #endif
45 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
46                                     int *indexp, struct xfs_buf **dbpp);
47 static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
48                                     int first, int last);
49 static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
50
51 static void
52 xfs_dir2_leaf_verify(
53         struct xfs_buf          *bp,
54         __be16                  magic)
55 {
56         struct xfs_mount        *mp = bp->b_target->bt_mount;
57         struct xfs_dir2_leaf_hdr *hdr = bp->b_addr;
58         int                     block_ok = 0;
59
60         block_ok = hdr->info.magic == magic;
61         if (!block_ok) {
62                 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
63                 xfs_buf_ioerror(bp, EFSCORRUPTED);
64         }
65 }
66
67 static void
68 xfs_dir2_leaf1_read_verify(
69         struct xfs_buf  *bp)
70 {
71         xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
72 }
73
74 static void
75 xfs_dir2_leaf1_write_verify(
76         struct xfs_buf  *bp)
77 {
78         xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
79 }
80
81 void
82 xfs_dir2_leafn_read_verify(
83         struct xfs_buf  *bp)
84 {
85         xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
86 }
87
88 void
89 xfs_dir2_leafn_write_verify(
90         struct xfs_buf  *bp)
91 {
92         xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
93 }
94
95 static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = {
96         .verify_read = xfs_dir2_leaf1_read_verify,
97         .verify_write = xfs_dir2_leaf1_write_verify,
98 };
99
100 const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = {
101         .verify_read = xfs_dir2_leafn_read_verify,
102         .verify_write = xfs_dir2_leafn_write_verify,
103 };
104
105 static int
106 xfs_dir2_leaf_read(
107         struct xfs_trans        *tp,
108         struct xfs_inode        *dp,
109         xfs_dablk_t             fbno,
110         xfs_daddr_t             mappedbno,
111         struct xfs_buf          **bpp)
112 {
113         return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
114                                 XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops);
115 }
116
117 int
118 xfs_dir2_leafn_read(
119         struct xfs_trans        *tp,
120         struct xfs_inode        *dp,
121         xfs_dablk_t             fbno,
122         xfs_daddr_t             mappedbno,
123         struct xfs_buf          **bpp)
124 {
125         return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
126                                 XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops);
127 }
128
129 /*
130  * Convert a block form directory to a leaf form directory.
131  */
132 int                                             /* error */
133 xfs_dir2_block_to_leaf(
134         xfs_da_args_t           *args,          /* operation arguments */
135         struct xfs_buf          *dbp)           /* input block's buffer */
136 {
137         __be16                  *bestsp;        /* leaf's bestsp entries */
138         xfs_dablk_t             blkno;          /* leaf block's bno */
139         xfs_dir2_data_hdr_t     *hdr;           /* block header */
140         xfs_dir2_leaf_entry_t   *blp;           /* block's leaf entries */
141         xfs_dir2_block_tail_t   *btp;           /* block's tail */
142         xfs_inode_t             *dp;            /* incore directory inode */
143         int                     error;          /* error return code */
144         struct xfs_buf          *lbp;           /* leaf block's buffer */
145         xfs_dir2_db_t           ldb;            /* leaf block's bno */
146         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
147         xfs_dir2_leaf_tail_t    *ltp;           /* leaf's tail */
148         xfs_mount_t             *mp;            /* filesystem mount point */
149         int                     needlog;        /* need to log block header */
150         int                     needscan;       /* need to rescan bestfree */
151         xfs_trans_t             *tp;            /* transaction pointer */
152         struct xfs_dir2_data_free *bf;
153
154         trace_xfs_dir2_block_to_leaf(args);
155
156         dp = args->dp;
157         mp = dp->i_mount;
158         tp = args->trans;
159         /*
160          * Add the leaf block to the inode.
161          * This interface will only put blocks in the leaf/node range.
162          * Since that's empty now, we'll get the root (block 0 in range).
163          */
164         if ((error = xfs_da_grow_inode(args, &blkno))) {
165                 return error;
166         }
167         ldb = xfs_dir2_da_to_db(mp, blkno);
168         ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
169         /*
170          * Initialize the leaf block, get a buffer for it.
171          */
172         if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) {
173                 return error;
174         }
175         ASSERT(lbp != NULL);
176         leaf = lbp->b_addr;
177         hdr = dbp->b_addr;
178         xfs_dir3_data_check(dp, dbp);
179         btp = xfs_dir2_block_tail_p(mp, hdr);
180         blp = xfs_dir2_block_leaf_p(btp);
181         bf = xfs_dir3_data_bestfree_p(hdr);
182         /*
183          * Set the counts in the leaf header.
184          */
185         leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count));
186         leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale));
187         /*
188          * Could compact these but I think we always do the conversion
189          * after squeezing out stale entries.
190          */
191         memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
192         xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1);
193         needscan = 0;
194         needlog = 1;
195         /*
196          * Make the space formerly occupied by the leaf entries and block
197          * tail be free.
198          */
199         xfs_dir2_data_make_free(tp, dbp,
200                 (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
201                 (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
202                                        (char *)blp),
203                 &needlog, &needscan);
204         /*
205          * Fix up the block header, make it a data block.
206          */
207         dbp->b_ops = &xfs_dir3_data_buf_ops;
208         if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
209                 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
210         else
211                 hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
212
213         if (needscan)
214                 xfs_dir2_data_freescan(mp, hdr, &needlog);
215         /*
216          * Set up leaf tail and bests table.
217          */
218         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
219         ltp->bestcount = cpu_to_be32(1);
220         bestsp = xfs_dir2_leaf_bests_p(ltp);
221         bestsp[0] =  bf[0].length;
222         /*
223          * Log the data header and leaf bests table.
224          */
225         if (needlog)
226                 xfs_dir2_data_log_header(tp, dbp);
227         xfs_dir2_leaf_check(dp, lbp);
228         xfs_dir3_data_check(dp, dbp);
229         xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
230         return 0;
231 }
232
233 STATIC void
234 xfs_dir2_leaf_find_stale(
235         struct xfs_dir2_leaf    *leaf,
236         int                     index,
237         int                     *lowstale,
238         int                     *highstale)
239 {
240         /*
241          * Find the first stale entry before our index, if any.
242          */
243         for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
244                 if (leaf->ents[*lowstale].address ==
245                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
246                         break;
247         }
248
249         /*
250          * Find the first stale entry at or after our index, if any.
251          * Stop if the result would require moving more entries than using
252          * lowstale.
253          */
254         for (*highstale = index;
255              *highstale < be16_to_cpu(leaf->hdr.count);
256              ++*highstale) {
257                 if (leaf->ents[*highstale].address ==
258                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
259                         break;
260                 if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
261                         break;
262         }
263 }
264
265 struct xfs_dir2_leaf_entry *
266 xfs_dir2_leaf_find_entry(
267         xfs_dir2_leaf_t         *leaf,          /* leaf structure */
268         int                     index,          /* leaf table position */
269         int                     compact,        /* need to compact leaves */
270         int                     lowstale,       /* index of prev stale leaf */
271         int                     highstale,      /* index of next stale leaf */
272         int                     *lfloglow,      /* low leaf logging index */
273         int                     *lfloghigh)     /* high leaf logging index */
274 {
275         if (!leaf->hdr.stale) {
276                 xfs_dir2_leaf_entry_t   *lep;   /* leaf entry table pointer */
277
278                 /*
279                  * Now we need to make room to insert the leaf entry.
280                  *
281                  * If there are no stale entries, just insert a hole at index.
282                  */
283                 lep = &leaf->ents[index];
284                 if (index < be16_to_cpu(leaf->hdr.count))
285                         memmove(lep + 1, lep,
286                                 (be16_to_cpu(leaf->hdr.count) - index) *
287                                  sizeof(*lep));
288
289                 /*
290                  * Record low and high logging indices for the leaf.
291                  */
292                 *lfloglow = index;
293                 *lfloghigh = be16_to_cpu(leaf->hdr.count);
294                 be16_add_cpu(&leaf->hdr.count, 1);
295                 return lep;
296         }
297
298         /*
299          * There are stale entries.
300          *
301          * We will use one of them for the new entry.  It's probably not at
302          * the right location, so we'll have to shift some up or down first.
303          *
304          * If we didn't compact before, we need to find the nearest stale
305          * entries before and after our insertion point.
306          */
307         if (compact == 0)
308                 xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
309
310         /*
311          * If the low one is better, use it.
312          */
313         if (lowstale >= 0 &&
314             (highstale == be16_to_cpu(leaf->hdr.count) ||
315              index - lowstale - 1 < highstale - index)) {
316                 ASSERT(index - lowstale - 1 >= 0);
317                 ASSERT(leaf->ents[lowstale].address ==
318                        cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
319
320                 /*
321                  * Copy entries up to cover the stale entry and make room
322                  * for the new entry.
323                  */
324                 if (index - lowstale - 1 > 0) {
325                         memmove(&leaf->ents[lowstale],
326                                 &leaf->ents[lowstale + 1],
327                                 (index - lowstale - 1) *
328                                 sizeof(xfs_dir2_leaf_entry_t));
329                 }
330                 *lfloglow = MIN(lowstale, *lfloglow);
331                 *lfloghigh = MAX(index - 1, *lfloghigh);
332                 be16_add_cpu(&leaf->hdr.stale, -1);
333                 return &leaf->ents[index - 1];
334         }
335
336         /*
337          * The high one is better, so use that one.
338          */
339         ASSERT(highstale - index >= 0);
340         ASSERT(leaf->ents[highstale].address ==
341                cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
342
343         /*
344          * Copy entries down to cover the stale entry and make room for the
345          * new entry.
346          */
347         if (highstale - index > 0) {
348                 memmove(&leaf->ents[index + 1],
349                         &leaf->ents[index],
350                         (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
351         }
352         *lfloglow = MIN(index, *lfloglow);
353         *lfloghigh = MAX(highstale, *lfloghigh);
354         be16_add_cpu(&leaf->hdr.stale, -1);
355         return &leaf->ents[index];
356 }
357
358 /*
359  * Add an entry to a leaf form directory.
360  */
361 int                                             /* error */
362 xfs_dir2_leaf_addname(
363         xfs_da_args_t           *args)          /* operation arguments */
364 {
365         __be16                  *bestsp;        /* freespace table in leaf */
366         int                     compact;        /* need to compact leaves */
367         xfs_dir2_data_hdr_t     *hdr;           /* data block header */
368         struct xfs_buf          *dbp;           /* data block buffer */
369         xfs_dir2_data_entry_t   *dep;           /* data block entry */
370         xfs_inode_t             *dp;            /* incore directory inode */
371         xfs_dir2_data_unused_t  *dup;           /* data unused entry */
372         int                     error;          /* error return value */
373         int                     grown;          /* allocated new data block */
374         int                     highstale;      /* index of next stale leaf */
375         int                     i;              /* temporary, index */
376         int                     index;          /* leaf table position */
377         struct xfs_buf          *lbp;           /* leaf's buffer */
378         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
379         int                     length;         /* length of new entry */
380         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry table pointer */
381         int                     lfloglow;       /* low leaf logging index */
382         int                     lfloghigh;      /* high leaf logging index */
383         int                     lowstale;       /* index of prev stale leaf */
384         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
385         xfs_mount_t             *mp;            /* filesystem mount point */
386         int                     needbytes;      /* leaf block bytes needed */
387         int                     needlog;        /* need to log data header */
388         int                     needscan;       /* need to rescan data free */
389         __be16                  *tagp;          /* end of data entry */
390         xfs_trans_t             *tp;            /* transaction pointer */
391         xfs_dir2_db_t           use_block;      /* data block number */
392         struct xfs_dir2_data_free *bf;          /* bestfree table */
393
394         trace_xfs_dir2_leaf_addname(args);
395
396         dp = args->dp;
397         tp = args->trans;
398         mp = dp->i_mount;
399
400         error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
401         if (error)
402                 return error;
403
404         /*
405          * Look up the entry by hash value and name.
406          * We know it's not there, our caller has already done a lookup.
407          * So the index is of the entry to insert in front of.
408          * But if there are dup hash values the index is of the first of those.
409          */
410         index = xfs_dir2_leaf_search_hash(args, lbp);
411         leaf = lbp->b_addr;
412         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
413         bestsp = xfs_dir2_leaf_bests_p(ltp);
414         length = xfs_dir2_data_entsize(args->namelen);
415         /*
416          * See if there are any entries with the same hash value
417          * and space in their block for the new entry.
418          * This is good because it puts multiple same-hash value entries
419          * in a data block, improving the lookup of those entries.
420          */
421         for (use_block = -1, lep = &leaf->ents[index];
422              index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
423              index++, lep++) {
424                 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
425                         continue;
426                 i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
427                 ASSERT(i < be32_to_cpu(ltp->bestcount));
428                 ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
429                 if (be16_to_cpu(bestsp[i]) >= length) {
430                         use_block = i;
431                         break;
432                 }
433         }
434         /*
435          * Didn't find a block yet, linear search all the data blocks.
436          */
437         if (use_block == -1) {
438                 for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
439                         /*
440                          * Remember a block we see that's missing.
441                          */
442                         if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
443                             use_block == -1)
444                                 use_block = i;
445                         else if (be16_to_cpu(bestsp[i]) >= length) {
446                                 use_block = i;
447                                 break;
448                         }
449                 }
450         }
451         /*
452          * How many bytes do we need in the leaf block?
453          */
454         needbytes = 0;
455         if (!leaf->hdr.stale)
456                 needbytes += sizeof(xfs_dir2_leaf_entry_t);
457         if (use_block == -1)
458                 needbytes += sizeof(xfs_dir2_data_off_t);
459
460         /*
461          * Now kill use_block if it refers to a missing block, so we
462          * can use it as an indication of allocation needed.
463          */
464         if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
465                 use_block = -1;
466         /*
467          * If we don't have enough free bytes but we can make enough
468          * by compacting out stale entries, we'll do that.
469          */
470         if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
471                                 needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
472                 compact = 1;
473         }
474         /*
475          * Otherwise if we don't have enough free bytes we need to
476          * convert to node form.
477          */
478         else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
479                                                 leaf->hdr.count)] < needbytes) {
480                 /*
481                  * Just checking or no space reservation, give up.
482                  */
483                 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
484                                                         args->total == 0) {
485                         xfs_trans_brelse(tp, lbp);
486                         return XFS_ERROR(ENOSPC);
487                 }
488                 /*
489                  * Convert to node form.
490                  */
491                 error = xfs_dir2_leaf_to_node(args, lbp);
492                 if (error)
493                         return error;
494                 /*
495                  * Then add the new entry.
496                  */
497                 return xfs_dir2_node_addname(args);
498         }
499         /*
500          * Otherwise it will fit without compaction.
501          */
502         else
503                 compact = 0;
504         /*
505          * If just checking, then it will fit unless we needed to allocate
506          * a new data block.
507          */
508         if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
509                 xfs_trans_brelse(tp, lbp);
510                 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
511         }
512         /*
513          * If no allocations are allowed, return now before we've
514          * changed anything.
515          */
516         if (args->total == 0 && use_block == -1) {
517                 xfs_trans_brelse(tp, lbp);
518                 return XFS_ERROR(ENOSPC);
519         }
520         /*
521          * Need to compact the leaf entries, removing stale ones.
522          * Leave one stale entry behind - the one closest to our
523          * insertion index - and we'll shift that one to our insertion
524          * point later.
525          */
526         if (compact) {
527                 xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
528                         &lfloglow, &lfloghigh);
529         }
530         /*
531          * There are stale entries, so we'll need log-low and log-high
532          * impossibly bad values later.
533          */
534         else if (be16_to_cpu(leaf->hdr.stale)) {
535                 lfloglow = be16_to_cpu(leaf->hdr.count);
536                 lfloghigh = -1;
537         }
538         /*
539          * If there was no data block space found, we need to allocate
540          * a new one.
541          */
542         if (use_block == -1) {
543                 /*
544                  * Add the new data block.
545                  */
546                 if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
547                                 &use_block))) {
548                         xfs_trans_brelse(tp, lbp);
549                         return error;
550                 }
551                 /*
552                  * Initialize the block.
553                  */
554                 if ((error = xfs_dir3_data_init(args, use_block, &dbp))) {
555                         xfs_trans_brelse(tp, lbp);
556                         return error;
557                 }
558                 /*
559                  * If we're adding a new data block on the end we need to
560                  * extend the bests table.  Copy it up one entry.
561                  */
562                 if (use_block >= be32_to_cpu(ltp->bestcount)) {
563                         bestsp--;
564                         memmove(&bestsp[0], &bestsp[1],
565                                 be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
566                         be32_add_cpu(&ltp->bestcount, 1);
567                         xfs_dir2_leaf_log_tail(tp, lbp);
568                         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
569                 }
570                 /*
571                  * If we're filling in a previously empty block just log it.
572                  */
573                 else
574                         xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
575                 hdr = dbp->b_addr;
576                 bf = xfs_dir3_data_bestfree_p(hdr);
577                 bestsp[use_block] = bf[0].length;
578                 grown = 1;
579         } else {
580                 /*
581                  * Already had space in some data block.
582                  * Just read that one in.
583                  */
584                 error = xfs_dir3_data_read(tp, dp,
585                                            xfs_dir2_db_to_da(mp, use_block),
586                                            -1, &dbp);
587                 if (error) {
588                         xfs_trans_brelse(tp, lbp);
589                         return error;
590                 }
591                 hdr = dbp->b_addr;
592                 bf = xfs_dir3_data_bestfree_p(hdr);
593                 grown = 0;
594         }
595         /*
596          * Point to the biggest freespace in our data block.
597          */
598         dup = (xfs_dir2_data_unused_t *)
599               ((char *)hdr + be16_to_cpu(bf[0].offset));
600         ASSERT(be16_to_cpu(dup->length) >= length);
601         needscan = needlog = 0;
602         /*
603          * Mark the initial part of our freespace in use for the new entry.
604          */
605         xfs_dir2_data_use_free(tp, dbp, dup,
606                 (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
607                 &needlog, &needscan);
608         /*
609          * Initialize our new entry (at last).
610          */
611         dep = (xfs_dir2_data_entry_t *)dup;
612         dep->inumber = cpu_to_be64(args->inumber);
613         dep->namelen = args->namelen;
614         memcpy(dep->name, args->name, dep->namelen);
615         tagp = xfs_dir2_data_entry_tag_p(dep);
616         *tagp = cpu_to_be16((char *)dep - (char *)hdr);
617         /*
618          * Need to scan fix up the bestfree table.
619          */
620         if (needscan)
621                 xfs_dir2_data_freescan(mp, hdr, &needlog);
622         /*
623          * Need to log the data block's header.
624          */
625         if (needlog)
626                 xfs_dir2_data_log_header(tp, dbp);
627         xfs_dir2_data_log_entry(tp, dbp, dep);
628         /*
629          * If the bests table needs to be changed, do it.
630          * Log the change unless we've already done that.
631          */
632         if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
633                 bestsp[use_block] = bf[0].length;
634                 if (!grown)
635                         xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
636         }
637
638         lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
639                                        highstale, &lfloglow, &lfloghigh);
640
641         /*
642          * Fill in the new leaf entry.
643          */
644         lep->hashval = cpu_to_be32(args->hashval);
645         lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
646                                 be16_to_cpu(*tagp)));
647         /*
648          * Log the leaf fields and give up the buffers.
649          */
650         xfs_dir2_leaf_log_header(tp, lbp);
651         xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
652         xfs_dir2_leaf_check(dp, lbp);
653         xfs_dir3_data_check(dp, dbp);
654         return 0;
655 }
656
657 #ifdef DEBUG
658 /*
659  * Check the internal consistency of a leaf1 block.
660  * Pop an assert if something is wrong.
661  */
662 STATIC void
663 xfs_dir2_leaf_check(
664         struct xfs_inode        *dp,            /* incore directory inode */
665         struct xfs_buf          *bp)            /* leaf's buffer */
666 {
667         int                     i;              /* leaf index */
668         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
669         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
670         xfs_mount_t             *mp;            /* filesystem mount point */
671         int                     stale;          /* count of stale leaves */
672
673         leaf = bp->b_addr;
674         mp = dp->i_mount;
675         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
676         /*
677          * This value is not restrictive enough.
678          * Should factor in the size of the bests table as well.
679          * We can deduce a value for that from di_size.
680          */
681         ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
682         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
683         /*
684          * Leaves and bests don't overlap.
685          */
686         ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
687                (char *)xfs_dir2_leaf_bests_p(ltp));
688         /*
689          * Check hash value order, count stale entries.
690          */
691         for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
692                 if (i + 1 < be16_to_cpu(leaf->hdr.count))
693                         ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
694                                be32_to_cpu(leaf->ents[i + 1].hashval));
695                 if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
696                         stale++;
697         }
698         ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
699 }
700 #endif  /* DEBUG */
701
702 /*
703  * Compact out any stale entries in the leaf.
704  * Log the header and changed leaf entries, if any.
705  */
706 void
707 xfs_dir2_leaf_compact(
708         xfs_da_args_t   *args,          /* operation arguments */
709         struct xfs_buf  *bp)            /* leaf buffer */
710 {
711         int             from;           /* source leaf index */
712         xfs_dir2_leaf_t *leaf;          /* leaf structure */
713         int             loglow;         /* first leaf entry to log */
714         int             to;             /* target leaf index */
715
716         leaf = bp->b_addr;
717         if (!leaf->hdr.stale) {
718                 return;
719         }
720         /*
721          * Compress out the stale entries in place.
722          */
723         for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
724                 if (leaf->ents[from].address ==
725                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
726                         continue;
727                 /*
728                  * Only actually copy the entries that are different.
729                  */
730                 if (from > to) {
731                         if (loglow == -1)
732                                 loglow = to;
733                         leaf->ents[to] = leaf->ents[from];
734                 }
735                 to++;
736         }
737         /*
738          * Update and log the header, log the leaf entries.
739          */
740         ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
741         be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
742         leaf->hdr.stale = 0;
743         xfs_dir2_leaf_log_header(args->trans, bp);
744         if (loglow != -1)
745                 xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
746 }
747
748 /*
749  * Compact the leaf entries, removing stale ones.
750  * Leave one stale entry behind - the one closest to our
751  * insertion index - and the caller will shift that one to our insertion
752  * point later.
753  * Return new insertion index, where the remaining stale entry is,
754  * and leaf logging indices.
755  */
756 void
757 xfs_dir2_leaf_compact_x1(
758         struct xfs_buf  *bp,            /* leaf buffer */
759         int             *indexp,        /* insertion index */
760         int             *lowstalep,     /* out: stale entry before us */
761         int             *highstalep,    /* out: stale entry after us */
762         int             *lowlogp,       /* out: low log index */
763         int             *highlogp)      /* out: high log index */
764 {
765         int             from;           /* source copy index */
766         int             highstale;      /* stale entry at/after index */
767         int             index;          /* insertion index */
768         int             keepstale;      /* source index of kept stale */
769         xfs_dir2_leaf_t *leaf;          /* leaf structure */
770         int             lowstale;       /* stale entry before index */
771         int             newindex=0;     /* new insertion index */
772         int             to;             /* destination copy index */
773
774         leaf = bp->b_addr;
775         ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
776         index = *indexp;
777
778         xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
779
780         /*
781          * Pick the better of lowstale and highstale.
782          */
783         if (lowstale >= 0 &&
784             (highstale == be16_to_cpu(leaf->hdr.count) ||
785              index - lowstale <= highstale - index))
786                 keepstale = lowstale;
787         else
788                 keepstale = highstale;
789         /*
790          * Copy the entries in place, removing all the stale entries
791          * except keepstale.
792          */
793         for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
794                 /*
795                  * Notice the new value of index.
796                  */
797                 if (index == from)
798                         newindex = to;
799                 if (from != keepstale &&
800                     leaf->ents[from].address ==
801                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
802                         if (from == to)
803                                 *lowlogp = to;
804                         continue;
805                 }
806                 /*
807                  * Record the new keepstale value for the insertion.
808                  */
809                 if (from == keepstale)
810                         lowstale = highstale = to;
811                 /*
812                  * Copy only the entries that have moved.
813                  */
814                 if (from > to)
815                         leaf->ents[to] = leaf->ents[from];
816                 to++;
817         }
818         ASSERT(from > to);
819         /*
820          * If the insertion point was past the last entry,
821          * set the new insertion point accordingly.
822          */
823         if (index == from)
824                 newindex = to;
825         *indexp = newindex;
826         /*
827          * Adjust the leaf header values.
828          */
829         be16_add_cpu(&leaf->hdr.count, -(from - to));
830         leaf->hdr.stale = cpu_to_be16(1);
831         /*
832          * Remember the low/high stale value only in the "right"
833          * direction.
834          */
835         if (lowstale >= newindex)
836                 lowstale = -1;
837         else
838                 highstale = be16_to_cpu(leaf->hdr.count);
839         *highlogp = be16_to_cpu(leaf->hdr.count) - 1;
840         *lowstalep = lowstale;
841         *highstalep = highstale;
842 }
843
844 struct xfs_dir2_leaf_map_info {
845         xfs_extlen_t    map_blocks;     /* number of fsbs in map */
846         xfs_dablk_t     map_off;        /* last mapped file offset */
847         int             map_size;       /* total entries in *map */
848         int             map_valid;      /* valid entries in *map */
849         int             nmap;           /* mappings to ask xfs_bmapi */
850         xfs_dir2_db_t   curdb;          /* db for current block */
851         int             ra_current;     /* number of read-ahead blks */
852         int             ra_index;       /* *map index for read-ahead */
853         int             ra_offset;      /* map entry offset for ra */
854         int             ra_want;        /* readahead count wanted */
855         struct xfs_bmbt_irec map[];     /* map vector for blocks */
856 };
857
858 STATIC int
859 xfs_dir2_leaf_readbuf(
860         struct xfs_inode        *dp,
861         size_t                  bufsize,
862         struct xfs_dir2_leaf_map_info *mip,
863         xfs_dir2_off_t          *curoff,
864         struct xfs_buf          **bpp)
865 {
866         struct xfs_mount        *mp = dp->i_mount;
867         struct xfs_buf          *bp = *bpp;
868         struct xfs_bmbt_irec    *map = mip->map;
869         int                     error = 0;
870         int                     length;
871         int                     i;
872         int                     j;
873
874         /*
875          * If we have a buffer, we need to release it and
876          * take it out of the mapping.
877          */
878
879         if (bp) {
880                 xfs_trans_brelse(NULL, bp);
881                 bp = NULL;
882                 mip->map_blocks -= mp->m_dirblkfsbs;
883                 /*
884                  * Loop to get rid of the extents for the
885                  * directory block.
886                  */
887                 for (i = mp->m_dirblkfsbs; i > 0; ) {
888                         j = min_t(int, map->br_blockcount, i);
889                         map->br_blockcount -= j;
890                         map->br_startblock += j;
891                         map->br_startoff += j;
892                         /*
893                          * If mapping is done, pitch it from
894                          * the table.
895                          */
896                         if (!map->br_blockcount && --mip->map_valid)
897                                 memmove(&map[0], &map[1],
898                                         sizeof(map[0]) * mip->map_valid);
899                         i -= j;
900                 }
901         }
902
903         /*
904          * Recalculate the readahead blocks wanted.
905          */
906         mip->ra_want = howmany(bufsize + mp->m_dirblksize,
907                                mp->m_sb.sb_blocksize) - 1;
908         ASSERT(mip->ra_want >= 0);
909
910         /*
911          * If we don't have as many as we want, and we haven't
912          * run out of data blocks, get some more mappings.
913          */
914         if (1 + mip->ra_want > mip->map_blocks &&
915             mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
916                 /*
917                  * Get more bmaps, fill in after the ones
918                  * we already have in the table.
919                  */
920                 mip->nmap = mip->map_size - mip->map_valid;
921                 error = xfs_bmapi_read(dp, mip->map_off,
922                                 xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
923                                                                 mip->map_off,
924                                 &map[mip->map_valid], &mip->nmap, 0);
925
926                 /*
927                  * Don't know if we should ignore this or try to return an
928                  * error.  The trouble with returning errors is that readdir
929                  * will just stop without actually passing the error through.
930                  */
931                 if (error)
932                         goto out;       /* XXX */
933
934                 /*
935                  * If we got all the mappings we asked for, set the final map
936                  * offset based on the last bmap value received.  Otherwise,
937                  * we've reached the end.
938                  */
939                 if (mip->nmap == mip->map_size - mip->map_valid) {
940                         i = mip->map_valid + mip->nmap - 1;
941                         mip->map_off = map[i].br_startoff + map[i].br_blockcount;
942                 } else
943                         mip->map_off = xfs_dir2_byte_to_da(mp,
944                                                         XFS_DIR2_LEAF_OFFSET);
945
946                 /*
947                  * Look for holes in the mapping, and eliminate them.  Count up
948                  * the valid blocks.
949                  */
950                 for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
951                         if (map[i].br_startblock == HOLESTARTBLOCK) {
952                                 mip->nmap--;
953                                 length = mip->map_valid + mip->nmap - i;
954                                 if (length)
955                                         memmove(&map[i], &map[i + 1],
956                                                 sizeof(map[i]) * length);
957                         } else {
958                                 mip->map_blocks += map[i].br_blockcount;
959                                 i++;
960                         }
961                 }
962                 mip->map_valid += mip->nmap;
963         }
964
965         /*
966          * No valid mappings, so no more data blocks.
967          */
968         if (!mip->map_valid) {
969                 *curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
970                 goto out;
971         }
972
973         /*
974          * Read the directory block starting at the first mapping.
975          */
976         mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
977         error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
978                         map->br_blockcount >= mp->m_dirblkfsbs ?
979                             XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
980
981         /*
982          * Should just skip over the data block instead of giving up.
983          */
984         if (error)
985                 goto out;       /* XXX */
986
987         /*
988          * Adjust the current amount of read-ahead: we just read a block that
989          * was previously ra.
990          */
991         if (mip->ra_current)
992                 mip->ra_current -= mp->m_dirblkfsbs;
993
994         /*
995          * Do we need more readahead?
996          */
997         for (mip->ra_index = mip->ra_offset = i = 0;
998              mip->ra_want > mip->ra_current && i < mip->map_blocks;
999              i += mp->m_dirblkfsbs) {
1000                 ASSERT(mip->ra_index < mip->map_valid);
1001                 /*
1002                  * Read-ahead a contiguous directory block.
1003                  */
1004                 if (i > mip->ra_current &&
1005                     map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
1006                         xfs_dir3_data_readahead(NULL, dp,
1007                                 map[mip->ra_index].br_startoff + mip->ra_offset,
1008                                 XFS_FSB_TO_DADDR(mp,
1009                                         map[mip->ra_index].br_startblock +
1010                                                         mip->ra_offset));
1011                         mip->ra_current = i;
1012                 }
1013
1014                 /*
1015                  * Read-ahead a non-contiguous directory block.  This doesn't
1016                  * use our mapping, but this is a very rare case.
1017                  */
1018                 else if (i > mip->ra_current) {
1019                         xfs_dir3_data_readahead(NULL, dp,
1020                                         map[mip->ra_index].br_startoff +
1021                                                         mip->ra_offset, -1);
1022                         mip->ra_current = i;
1023                 }
1024
1025                 /*
1026                  * Advance offset through the mapping table.
1027                  */
1028                 for (j = 0; j < mp->m_dirblkfsbs; j++) {
1029                         /*
1030                          * The rest of this extent but not more than a dir
1031                          * block.
1032                          */
1033                         length = min_t(int, mp->m_dirblkfsbs,
1034                                         map[mip->ra_index].br_blockcount -
1035                                                         mip->ra_offset);
1036                         j += length;
1037                         mip->ra_offset += length;
1038
1039                         /*
1040                          * Advance to the next mapping if this one is used up.
1041                          */
1042                         if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
1043                                 mip->ra_offset = 0;
1044                                 mip->ra_index++;
1045                         }
1046                 }
1047         }
1048
1049 out:
1050         *bpp = bp;
1051         return error;
1052 }
1053
1054 /*
1055  * Getdents (readdir) for leaf and node directories.
1056  * This reads the data blocks only, so is the same for both forms.
1057  */
1058 int                                             /* error */
1059 xfs_dir2_leaf_getdents(
1060         xfs_inode_t             *dp,            /* incore directory inode */
1061         void                    *dirent,
1062         size_t                  bufsize,
1063         xfs_off_t               *offset,
1064         filldir_t               filldir)
1065 {
1066         struct xfs_buf          *bp = NULL;     /* data block buffer */
1067         xfs_dir2_data_hdr_t     *hdr;           /* data block header */
1068         xfs_dir2_data_entry_t   *dep;           /* data entry */
1069         xfs_dir2_data_unused_t  *dup;           /* unused entry */
1070         int                     error = 0;      /* error return value */
1071         int                     length;         /* temporary length value */
1072         xfs_mount_t             *mp;            /* filesystem mount point */
1073         int                     byteoff;        /* offset in current block */
1074         xfs_dir2_off_t          curoff;         /* current overall offset */
1075         xfs_dir2_off_t          newoff;         /* new curoff after new blk */
1076         char                    *ptr = NULL;    /* pointer to current data */
1077         struct xfs_dir2_leaf_map_info *map_info;
1078
1079         /*
1080          * If the offset is at or past the largest allowed value,
1081          * give up right away.
1082          */
1083         if (*offset >= XFS_DIR2_MAX_DATAPTR)
1084                 return 0;
1085
1086         mp = dp->i_mount;
1087
1088         /*
1089          * Set up to bmap a number of blocks based on the caller's
1090          * buffer size, the directory block size, and the filesystem
1091          * block size.
1092          */
1093         length = howmany(bufsize + mp->m_dirblksize,
1094                                      mp->m_sb.sb_blocksize);
1095         map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
1096                                 (length * sizeof(struct xfs_bmbt_irec)),
1097                                KM_SLEEP);
1098         map_info->map_size = length;
1099
1100         /*
1101          * Inside the loop we keep the main offset value as a byte offset
1102          * in the directory file.
1103          */
1104         curoff = xfs_dir2_dataptr_to_byte(mp, *offset);
1105
1106         /*
1107          * Force this conversion through db so we truncate the offset
1108          * down to get the start of the data block.
1109          */
1110         map_info->map_off = xfs_dir2_db_to_da(mp,
1111                                               xfs_dir2_byte_to_db(mp, curoff));
1112
1113         /*
1114          * Loop over directory entries until we reach the end offset.
1115          * Get more blocks and readahead as necessary.
1116          */
1117         while (curoff < XFS_DIR2_LEAF_OFFSET) {
1118                 /*
1119                  * If we have no buffer, or we're off the end of the
1120                  * current buffer, need to get another one.
1121                  */
1122                 if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
1123
1124                         error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
1125                                                       &curoff, &bp);
1126                         if (error || !map_info->map_valid)
1127                                 break;
1128
1129                         /*
1130                          * Having done a read, we need to set a new offset.
1131                          */
1132                         newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
1133                         /*
1134                          * Start of the current block.
1135                          */
1136                         if (curoff < newoff)
1137                                 curoff = newoff;
1138                         /*
1139                          * Make sure we're in the right block.
1140                          */
1141                         else if (curoff > newoff)
1142                                 ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1143                                        map_info->curdb);
1144                         hdr = bp->b_addr;
1145                         xfs_dir3_data_check(dp, bp);
1146                         /*
1147                          * Find our position in the block.
1148                          */
1149                         ptr = (char *)xfs_dir3_data_entry_p(hdr);
1150                         byteoff = xfs_dir2_byte_to_off(mp, curoff);
1151                         /*
1152                          * Skip past the header.
1153                          */
1154                         if (byteoff == 0)
1155                                 curoff += xfs_dir3_data_entry_offset(hdr);
1156                         /*
1157                          * Skip past entries until we reach our offset.
1158                          */
1159                         else {
1160                                 while ((char *)ptr - (char *)hdr < byteoff) {
1161                                         dup = (xfs_dir2_data_unused_t *)ptr;
1162
1163                                         if (be16_to_cpu(dup->freetag)
1164                                                   == XFS_DIR2_DATA_FREE_TAG) {
1165
1166                                                 length = be16_to_cpu(dup->length);
1167                                                 ptr += length;
1168                                                 continue;
1169                                         }
1170                                         dep = (xfs_dir2_data_entry_t *)ptr;
1171                                         length =
1172                                            xfs_dir2_data_entsize(dep->namelen);
1173                                         ptr += length;
1174                                 }
1175                                 /*
1176                                  * Now set our real offset.
1177                                  */
1178                                 curoff =
1179                                         xfs_dir2_db_off_to_byte(mp,
1180                                             xfs_dir2_byte_to_db(mp, curoff),
1181                                             (char *)ptr - (char *)hdr);
1182                                 if (ptr >= (char *)hdr + mp->m_dirblksize) {
1183                                         continue;
1184                                 }
1185                         }
1186                 }
1187                 /*
1188                  * We have a pointer to an entry.
1189                  * Is it a live one?
1190                  */
1191                 dup = (xfs_dir2_data_unused_t *)ptr;
1192                 /*
1193                  * No, it's unused, skip over it.
1194                  */
1195                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1196                         length = be16_to_cpu(dup->length);
1197                         ptr += length;
1198                         curoff += length;
1199                         continue;
1200                 }
1201
1202                 dep = (xfs_dir2_data_entry_t *)ptr;
1203                 length = xfs_dir2_data_entsize(dep->namelen);
1204
1205                 if (filldir(dirent, (char *)dep->name, dep->namelen,
1206                             xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1207                             be64_to_cpu(dep->inumber), DT_UNKNOWN))
1208                         break;
1209
1210                 /*
1211                  * Advance to next entry in the block.
1212                  */
1213                 ptr += length;
1214                 curoff += length;
1215                 /* bufsize may have just been a guess; don't go negative */
1216                 bufsize = bufsize > length ? bufsize - length : 0;
1217         }
1218
1219         /*
1220          * All done.  Set output offset value to current offset.
1221          */
1222         if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
1223                 *offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
1224         else
1225                 *offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
1226         kmem_free(map_info);
1227         if (bp)
1228                 xfs_trans_brelse(NULL, bp);
1229         return error;
1230 }
1231
1232 /*
1233  * Initialize a new leaf block, leaf1 or leafn magic accepted.
1234  */
1235 int
1236 xfs_dir2_leaf_init(
1237         xfs_da_args_t           *args,          /* operation arguments */
1238         xfs_dir2_db_t           bno,            /* directory block number */
1239         struct xfs_buf          **bpp,          /* out: leaf buffer */
1240         int                     magic)          /* magic number for block */
1241 {
1242         struct xfs_buf          *bp;            /* leaf buffer */
1243         xfs_inode_t             *dp;            /* incore directory inode */
1244         int                     error;          /* error return code */
1245         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1246         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1247         xfs_mount_t             *mp;            /* filesystem mount point */
1248         xfs_trans_t             *tp;            /* transaction pointer */
1249
1250         dp = args->dp;
1251         ASSERT(dp != NULL);
1252         tp = args->trans;
1253         mp = dp->i_mount;
1254         ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
1255                bno < XFS_DIR2_FREE_FIRSTDB(mp));
1256         /*
1257          * Get the buffer for the block.
1258          */
1259         error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1260                                XFS_DATA_FORK);
1261         if (error)
1262                 return error;
1263
1264         /*
1265          * Initialize the header.
1266          */
1267         leaf = bp->b_addr;
1268         leaf->hdr.info.magic = cpu_to_be16(magic);
1269         leaf->hdr.info.forw = 0;
1270         leaf->hdr.info.back = 0;
1271         leaf->hdr.count = 0;
1272         leaf->hdr.stale = 0;
1273         xfs_dir2_leaf_log_header(tp, bp);
1274         /*
1275          * If it's a leaf-format directory initialize the tail.
1276          * In this case our caller has the real bests table to copy into
1277          * the block.
1278          */
1279         if (magic == XFS_DIR2_LEAF1_MAGIC) {
1280                 bp->b_ops = &xfs_dir2_leaf1_buf_ops;
1281                 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1282                 ltp->bestcount = 0;
1283                 xfs_dir2_leaf_log_tail(tp, bp);
1284         } else
1285                 bp->b_ops = &xfs_dir2_leafn_buf_ops;
1286         *bpp = bp;
1287         return 0;
1288 }
1289
1290 /*
1291  * Log the bests entries indicated from a leaf1 block.
1292  */
1293 static void
1294 xfs_dir2_leaf_log_bests(
1295         xfs_trans_t             *tp,            /* transaction pointer */
1296         struct xfs_buf          *bp,            /* leaf buffer */
1297         int                     first,          /* first entry to log */
1298         int                     last)           /* last entry to log */
1299 {
1300         __be16                  *firstb;        /* pointer to first entry */
1301         __be16                  *lastb;         /* pointer to last entry */
1302         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1303         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1304
1305         leaf = bp->b_addr;
1306         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1307         ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1308         firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1309         lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1310         xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
1311                 (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1312 }
1313
1314 /*
1315  * Log the leaf entries indicated from a leaf1 or leafn block.
1316  */
1317 void
1318 xfs_dir2_leaf_log_ents(
1319         xfs_trans_t             *tp,            /* transaction pointer */
1320         struct xfs_buf          *bp,            /* leaf buffer */
1321         int                     first,          /* first entry to log */
1322         int                     last)           /* last entry to log */
1323 {
1324         xfs_dir2_leaf_entry_t   *firstlep;      /* pointer to first entry */
1325         xfs_dir2_leaf_entry_t   *lastlep;       /* pointer to last entry */
1326         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1327
1328         leaf = bp->b_addr;
1329         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1330                leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1331         firstlep = &leaf->ents[first];
1332         lastlep = &leaf->ents[last];
1333         xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
1334                 (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1335 }
1336
1337 /*
1338  * Log the header of the leaf1 or leafn block.
1339  */
1340 void
1341 xfs_dir2_leaf_log_header(
1342         struct xfs_trans        *tp,
1343         struct xfs_buf          *bp)
1344 {
1345         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1346
1347         leaf = bp->b_addr;
1348         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1349                leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1350         xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1351                 (uint)(sizeof(leaf->hdr) - 1));
1352 }
1353
1354 /*
1355  * Log the tail of the leaf1 block.
1356  */
1357 STATIC void
1358 xfs_dir2_leaf_log_tail(
1359         struct xfs_trans        *tp,
1360         struct xfs_buf          *bp)
1361 {
1362         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1363         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1364         xfs_mount_t             *mp;            /* filesystem mount point */
1365
1366         mp = tp->t_mountp;
1367         leaf = bp->b_addr;
1368         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1369         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1370         xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1371                 (uint)(mp->m_dirblksize - 1));
1372 }
1373
1374 /*
1375  * Look up the entry referred to by args in the leaf format directory.
1376  * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
1377  * is also used by the node-format code.
1378  */
1379 int
1380 xfs_dir2_leaf_lookup(
1381         xfs_da_args_t           *args)          /* operation arguments */
1382 {
1383         struct xfs_buf          *dbp;           /* data block buffer */
1384         xfs_dir2_data_entry_t   *dep;           /* data block entry */
1385         xfs_inode_t             *dp;            /* incore directory inode */
1386         int                     error;          /* error return code */
1387         int                     index;          /* found entry index */
1388         struct xfs_buf          *lbp;           /* leaf buffer */
1389         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1390         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1391         xfs_trans_t             *tp;            /* transaction pointer */
1392
1393         trace_xfs_dir2_leaf_lookup(args);
1394
1395         /*
1396          * Look up name in the leaf block, returning both buffers and index.
1397          */
1398         if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1399                 return error;
1400         }
1401         tp = args->trans;
1402         dp = args->dp;
1403         xfs_dir2_leaf_check(dp, lbp);
1404         leaf = lbp->b_addr;
1405         /*
1406          * Get to the leaf entry and contained data entry address.
1407          */
1408         lep = &leaf->ents[index];
1409         /*
1410          * Point to the data entry.
1411          */
1412         dep = (xfs_dir2_data_entry_t *)
1413               ((char *)dbp->b_addr +
1414                xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1415         /*
1416          * Return the found inode number & CI name if appropriate
1417          */
1418         args->inumber = be64_to_cpu(dep->inumber);
1419         error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1420         xfs_trans_brelse(tp, dbp);
1421         xfs_trans_brelse(tp, lbp);
1422         return XFS_ERROR(error);
1423 }
1424
1425 /*
1426  * Look up name/hash in the leaf block.
1427  * Fill in indexp with the found index, and dbpp with the data buffer.
1428  * If not found dbpp will be NULL, and ENOENT comes back.
1429  * lbpp will always be filled in with the leaf buffer unless there's an error.
1430  */
1431 static int                                      /* error */
1432 xfs_dir2_leaf_lookup_int(
1433         xfs_da_args_t           *args,          /* operation arguments */
1434         struct xfs_buf          **lbpp,         /* out: leaf buffer */
1435         int                     *indexp,        /* out: index in leaf block */
1436         struct xfs_buf          **dbpp)         /* out: data buffer */
1437 {
1438         xfs_dir2_db_t           curdb = -1;     /* current data block number */
1439         struct xfs_buf          *dbp = NULL;    /* data buffer */
1440         xfs_dir2_data_entry_t   *dep;           /* data entry */
1441         xfs_inode_t             *dp;            /* incore directory inode */
1442         int                     error;          /* error return code */
1443         int                     index;          /* index in leaf block */
1444         struct xfs_buf          *lbp;           /* leaf buffer */
1445         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1446         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1447         xfs_mount_t             *mp;            /* filesystem mount point */
1448         xfs_dir2_db_t           newdb;          /* new data block number */
1449         xfs_trans_t             *tp;            /* transaction pointer */
1450         xfs_dir2_db_t           cidb = -1;      /* case match data block no. */
1451         enum xfs_dacmp          cmp;            /* name compare result */
1452
1453         dp = args->dp;
1454         tp = args->trans;
1455         mp = dp->i_mount;
1456
1457         error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
1458         if (error)
1459                 return error;
1460
1461         *lbpp = lbp;
1462         leaf = lbp->b_addr;
1463         xfs_dir2_leaf_check(dp, lbp);
1464         /*
1465          * Look for the first leaf entry with our hash value.
1466          */
1467         index = xfs_dir2_leaf_search_hash(args, lbp);
1468         /*
1469          * Loop over all the entries with the right hash value
1470          * looking to match the name.
1471          */
1472         for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1473                                 be32_to_cpu(lep->hashval) == args->hashval;
1474                                 lep++, index++) {
1475                 /*
1476                  * Skip over stale leaf entries.
1477                  */
1478                 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
1479                         continue;
1480                 /*
1481                  * Get the new data block number.
1482                  */
1483                 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1484                 /*
1485                  * If it's not the same as the old data block number,
1486                  * need to pitch the old one and read the new one.
1487                  */
1488                 if (newdb != curdb) {
1489                         if (dbp)
1490                                 xfs_trans_brelse(tp, dbp);
1491                         error = xfs_dir3_data_read(tp, dp,
1492                                                    xfs_dir2_db_to_da(mp, newdb),
1493                                                    -1, &dbp);
1494                         if (error) {
1495                                 xfs_trans_brelse(tp, lbp);
1496                                 return error;
1497                         }
1498                         curdb = newdb;
1499                 }
1500                 /*
1501                  * Point to the data entry.
1502                  */
1503                 dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
1504                         xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1505                 /*
1506                  * Compare name and if it's an exact match, return the index
1507                  * and buffer. If it's the first case-insensitive match, store
1508                  * the index and buffer and continue looking for an exact match.
1509                  */
1510                 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1511                 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1512                         args->cmpresult = cmp;
1513                         *indexp = index;
1514                         /* case exact match: return the current buffer. */
1515                         if (cmp == XFS_CMP_EXACT) {
1516                                 *dbpp = dbp;
1517                                 return 0;
1518                         }
1519                         cidb = curdb;
1520                 }
1521         }
1522         ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1523         /*
1524          * Here, we can only be doing a lookup (not a rename or remove).
1525          * If a case-insensitive match was found earlier, re-read the
1526          * appropriate data block if required and return it.
1527          */
1528         if (args->cmpresult == XFS_CMP_CASE) {
1529                 ASSERT(cidb != -1);
1530                 if (cidb != curdb) {
1531                         xfs_trans_brelse(tp, dbp);
1532                         error = xfs_dir3_data_read(tp, dp,
1533                                                    xfs_dir2_db_to_da(mp, cidb),
1534                                                    -1, &dbp);
1535                         if (error) {
1536                                 xfs_trans_brelse(tp, lbp);
1537                                 return error;
1538                         }
1539                 }
1540                 *dbpp = dbp;
1541                 return 0;
1542         }
1543         /*
1544          * No match found, return ENOENT.
1545          */
1546         ASSERT(cidb == -1);
1547         if (dbp)
1548                 xfs_trans_brelse(tp, dbp);
1549         xfs_trans_brelse(tp, lbp);
1550         return XFS_ERROR(ENOENT);
1551 }
1552
1553 /*
1554  * Remove an entry from a leaf format directory.
1555  */
1556 int                                             /* error */
1557 xfs_dir2_leaf_removename(
1558         xfs_da_args_t           *args)          /* operation arguments */
1559 {
1560         __be16                  *bestsp;        /* leaf block best freespace */
1561         xfs_dir2_data_hdr_t     *hdr;           /* data block header */
1562         xfs_dir2_db_t           db;             /* data block number */
1563         struct xfs_buf          *dbp;           /* data block buffer */
1564         xfs_dir2_data_entry_t   *dep;           /* data entry structure */
1565         xfs_inode_t             *dp;            /* incore directory inode */
1566         int                     error;          /* error return code */
1567         xfs_dir2_db_t           i;              /* temporary data block # */
1568         int                     index;          /* index into leaf entries */
1569         struct xfs_buf          *lbp;           /* leaf buffer */
1570         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1571         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1572         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1573         xfs_mount_t             *mp;            /* filesystem mount point */
1574         int                     needlog;        /* need to log data header */
1575         int                     needscan;       /* need to rescan data frees */
1576         xfs_dir2_data_off_t     oldbest;        /* old value of best free */
1577         xfs_trans_t             *tp;            /* transaction pointer */
1578         struct xfs_dir2_data_free *bf;          /* bestfree table */
1579
1580         trace_xfs_dir2_leaf_removename(args);
1581
1582         /*
1583          * Lookup the leaf entry, get the leaf and data blocks read in.
1584          */
1585         if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1586                 return error;
1587         }
1588         dp = args->dp;
1589         tp = args->trans;
1590         mp = dp->i_mount;
1591         leaf = lbp->b_addr;
1592         hdr = dbp->b_addr;
1593         bf = xfs_dir3_data_bestfree_p(hdr);
1594         xfs_dir3_data_check(dp, dbp);
1595         /*
1596          * Point to the leaf entry, use that to point to the data entry.
1597          */
1598         lep = &leaf->ents[index];
1599         db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1600         dep = (xfs_dir2_data_entry_t *)
1601               ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1602         needscan = needlog = 0;
1603         oldbest = be16_to_cpu(bf[0].length);
1604         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1605         bestsp = xfs_dir2_leaf_bests_p(ltp);
1606         ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
1607         /*
1608          * Mark the former data entry unused.
1609          */
1610         xfs_dir2_data_make_free(tp, dbp,
1611                 (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
1612                 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
1613         /*
1614          * We just mark the leaf entry stale by putting a null in it.
1615          */
1616         be16_add_cpu(&leaf->hdr.stale, 1);
1617         xfs_dir2_leaf_log_header(tp, lbp);
1618         lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1619         xfs_dir2_leaf_log_ents(tp, lbp, index, index);
1620         /*
1621          * Scan the freespace in the data block again if necessary,
1622          * log the data block header if necessary.
1623          */
1624         if (needscan)
1625                 xfs_dir2_data_freescan(mp, hdr, &needlog);
1626         if (needlog)
1627                 xfs_dir2_data_log_header(tp, dbp);
1628         /*
1629          * If the longest freespace in the data block has changed,
1630          * put the new value in the bests table and log that.
1631          */
1632         if (be16_to_cpu(bf[0].length) != oldbest) {
1633                 bestsp[db] = bf[0].length;
1634                 xfs_dir2_leaf_log_bests(tp, lbp, db, db);
1635         }
1636         xfs_dir3_data_check(dp, dbp);
1637         /*
1638          * If the data block is now empty then get rid of the data block.
1639          */
1640         if (be16_to_cpu(bf[0].length) ==
1641                         mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)) {
1642                 ASSERT(db != mp->m_dirdatablk);
1643                 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1644                         /*
1645                          * Nope, can't get rid of it because it caused
1646                          * allocation of a bmap btree block to do so.
1647                          * Just go on, returning success, leaving the
1648                          * empty block in place.
1649                          */
1650                         if (error == ENOSPC && args->total == 0)
1651                                 error = 0;
1652                         xfs_dir2_leaf_check(dp, lbp);
1653                         return error;
1654                 }
1655                 dbp = NULL;
1656                 /*
1657                  * If this is the last data block then compact the
1658                  * bests table by getting rid of entries.
1659                  */
1660                 if (db == be32_to_cpu(ltp->bestcount) - 1) {
1661                         /*
1662                          * Look for the last active entry (i).
1663                          */
1664                         for (i = db - 1; i > 0; i--) {
1665                                 if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
1666                                         break;
1667                         }
1668                         /*
1669                          * Copy the table down so inactive entries at the
1670                          * end are removed.
1671                          */
1672                         memmove(&bestsp[db - i], bestsp,
1673                                 (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1674                         be32_add_cpu(&ltp->bestcount, -(db - i));
1675                         xfs_dir2_leaf_log_tail(tp, lbp);
1676                         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1677                 } else
1678                         bestsp[db] = cpu_to_be16(NULLDATAOFF);
1679         }
1680         /*
1681          * If the data block was not the first one, drop it.
1682          */
1683         else if (db != mp->m_dirdatablk)
1684                 dbp = NULL;
1685
1686         xfs_dir2_leaf_check(dp, lbp);
1687         /*
1688          * See if we can convert to block form.
1689          */
1690         return xfs_dir2_leaf_to_block(args, lbp, dbp);
1691 }
1692
1693 /*
1694  * Replace the inode number in a leaf format directory entry.
1695  */
1696 int                                             /* error */
1697 xfs_dir2_leaf_replace(
1698         xfs_da_args_t           *args)          /* operation arguments */
1699 {
1700         struct xfs_buf          *dbp;           /* data block buffer */
1701         xfs_dir2_data_entry_t   *dep;           /* data block entry */
1702         xfs_inode_t             *dp;            /* incore directory inode */
1703         int                     error;          /* error return code */
1704         int                     index;          /* index of leaf entry */
1705         struct xfs_buf          *lbp;           /* leaf buffer */
1706         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1707         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1708         xfs_trans_t             *tp;            /* transaction pointer */
1709
1710         trace_xfs_dir2_leaf_replace(args);
1711
1712         /*
1713          * Look up the entry.
1714          */
1715         if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1716                 return error;
1717         }
1718         dp = args->dp;
1719         leaf = lbp->b_addr;
1720         /*
1721          * Point to the leaf entry, get data address from it.
1722          */
1723         lep = &leaf->ents[index];
1724         /*
1725          * Point to the data entry.
1726          */
1727         dep = (xfs_dir2_data_entry_t *)
1728               ((char *)dbp->b_addr +
1729                xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1730         ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1731         /*
1732          * Put the new inode number in, log it.
1733          */
1734         dep->inumber = cpu_to_be64(args->inumber);
1735         tp = args->trans;
1736         xfs_dir2_data_log_entry(tp, dbp, dep);
1737         xfs_dir2_leaf_check(dp, lbp);
1738         xfs_trans_brelse(tp, lbp);
1739         return 0;
1740 }
1741
1742 /*
1743  * Return index in the leaf block (lbp) which is either the first
1744  * one with this hash value, or if there are none, the insert point
1745  * for that hash value.
1746  */
1747 int                                             /* index value */
1748 xfs_dir2_leaf_search_hash(
1749         xfs_da_args_t           *args,          /* operation arguments */
1750         struct xfs_buf          *lbp)           /* leaf buffer */
1751 {
1752         xfs_dahash_t            hash=0;         /* hash from this entry */
1753         xfs_dahash_t            hashwant;       /* hash value looking for */
1754         int                     high;           /* high leaf index */
1755         int                     low;            /* low leaf index */
1756         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1757         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1758         int                     mid=0;          /* current leaf index */
1759
1760         leaf = lbp->b_addr;
1761 #ifndef __KERNEL__
1762         if (!leaf->hdr.count)
1763                 return 0;
1764 #endif
1765         /*
1766          * Note, the table cannot be empty, so we have to go through the loop.
1767          * Binary search the leaf entries looking for our hash value.
1768          */
1769         for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1,
1770                 hashwant = args->hashval;
1771              low <= high; ) {
1772                 mid = (low + high) >> 1;
1773                 if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
1774                         break;
1775                 if (hash < hashwant)
1776                         low = mid + 1;
1777                 else
1778                         high = mid - 1;
1779         }
1780         /*
1781          * Found one, back up through all the equal hash values.
1782          */
1783         if (hash == hashwant) {
1784                 while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
1785                         mid--;
1786                 }
1787         }
1788         /*
1789          * Need to point to an entry higher than ours.
1790          */
1791         else if (hash < hashwant)
1792                 mid++;
1793         return mid;
1794 }
1795
1796 /*
1797  * Trim off a trailing data block.  We know it's empty since the leaf
1798  * freespace table says so.
1799  */
1800 int                                             /* error */
1801 xfs_dir2_leaf_trim_data(
1802         xfs_da_args_t           *args,          /* operation arguments */
1803         struct xfs_buf          *lbp,           /* leaf buffer */
1804         xfs_dir2_db_t           db)             /* data block number */
1805 {
1806         __be16                  *bestsp;        /* leaf bests table */
1807         struct xfs_buf          *dbp;           /* data block buffer */
1808         xfs_inode_t             *dp;            /* incore directory inode */
1809         int                     error;          /* error return value */
1810         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1811         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1812         xfs_mount_t             *mp;            /* filesystem mount point */
1813         xfs_trans_t             *tp;            /* transaction pointer */
1814
1815         dp = args->dp;
1816         mp = dp->i_mount;
1817         tp = args->trans;
1818         /*
1819          * Read the offending data block.  We need its buffer.
1820          */
1821         error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
1822         if (error)
1823                 return error;
1824
1825         leaf = lbp->b_addr;
1826         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1827
1828 #ifdef DEBUG
1829 {
1830         struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
1831         struct xfs_dir2_data_free *bf = xfs_dir3_data_bestfree_p(hdr);
1832
1833         ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
1834                hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
1835         ASSERT(be16_to_cpu(bf[0].length) ==
1836                mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr));
1837         ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1838 }
1839 #endif
1840
1841         /*
1842          * Get rid of the data block.
1843          */
1844         if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1845                 ASSERT(error != ENOSPC);
1846                 xfs_trans_brelse(tp, dbp);
1847                 return error;
1848         }
1849         /*
1850          * Eliminate the last bests entry from the table.
1851          */
1852         bestsp = xfs_dir2_leaf_bests_p(ltp);
1853         be32_add_cpu(&ltp->bestcount, -1);
1854         memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1855         xfs_dir2_leaf_log_tail(tp, lbp);
1856         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1857         return 0;
1858 }
1859
1860 static inline size_t
1861 xfs_dir2_leaf_size(
1862         struct xfs_dir2_leaf_hdr        *hdr,
1863         int                             counts)
1864 {
1865         int                     entries;
1866
1867         entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
1868         return sizeof(xfs_dir2_leaf_hdr_t) +
1869             entries * sizeof(xfs_dir2_leaf_entry_t) +
1870             counts * sizeof(xfs_dir2_data_off_t) +
1871             sizeof(xfs_dir2_leaf_tail_t);
1872 }
1873
1874 /*
1875  * Convert node form directory to leaf form directory.
1876  * The root of the node form dir needs to already be a LEAFN block.
1877  * Just return if we can't do anything.
1878  */
1879 int                                             /* error */
1880 xfs_dir2_node_to_leaf(
1881         xfs_da_state_t          *state)         /* directory operation state */
1882 {
1883         xfs_da_args_t           *args;          /* operation arguments */
1884         xfs_inode_t             *dp;            /* incore directory inode */
1885         int                     error;          /* error return code */
1886         struct xfs_buf          *fbp;           /* buffer for freespace block */
1887         xfs_fileoff_t           fo;             /* freespace file offset */
1888         xfs_dir2_free_t         *free;          /* freespace structure */
1889         struct xfs_buf          *lbp;           /* buffer for leaf block */
1890         xfs_dir2_leaf_tail_t    *ltp;           /* tail of leaf structure */
1891         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1892         xfs_mount_t             *mp;            /* filesystem mount point */
1893         int                     rval;           /* successful free trim? */
1894         xfs_trans_t             *tp;            /* transaction pointer */
1895         struct xfs_dir3_icfree_hdr freehdr;
1896
1897         /*
1898          * There's more than a leaf level in the btree, so there must
1899          * be multiple leafn blocks.  Give up.
1900          */
1901         if (state->path.active > 1)
1902                 return 0;
1903         args = state->args;
1904
1905         trace_xfs_dir2_node_to_leaf(args);
1906
1907         mp = state->mp;
1908         dp = args->dp;
1909         tp = args->trans;
1910         /*
1911          * Get the last offset in the file.
1912          */
1913         if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
1914                 return error;
1915         }
1916         fo -= mp->m_dirblkfsbs;
1917         /*
1918          * If there are freespace blocks other than the first one,
1919          * take this opportunity to remove trailing empty freespace blocks
1920          * that may have been left behind during no-space-reservation
1921          * operations.
1922          */
1923         while (fo > mp->m_dirfreeblk) {
1924                 if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
1925                         return error;
1926                 }
1927                 if (rval)
1928                         fo -= mp->m_dirblkfsbs;
1929                 else
1930                         return 0;
1931         }
1932         /*
1933          * Now find the block just before the freespace block.
1934          */
1935         if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
1936                 return error;
1937         }
1938         /*
1939          * If it's not the single leaf block, give up.
1940          */
1941         if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
1942                 return 0;
1943         lbp = state->path.blk[0].bp;
1944         leaf = lbp->b_addr;
1945         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1946         /*
1947          * Read the freespace block.
1948          */
1949         error = xfs_dir2_free_read(tp, dp,  mp->m_dirfreeblk, &fbp);
1950         if (error)
1951                 return error;
1952         free = fbp->b_addr;
1953         xfs_dir3_free_hdr_from_disk(&freehdr, free);
1954
1955         ASSERT(!freehdr.firstdb);
1956
1957         /*
1958          * Now see if the leafn and free data will fit in a leaf1.
1959          * If not, release the buffer and give up.
1960          */
1961         if (xfs_dir2_leaf_size(&leaf->hdr, freehdr.nvalid) > mp->m_dirblksize) {
1962                 xfs_trans_brelse(tp, fbp);
1963                 return 0;
1964         }
1965
1966         /*
1967          * If the leaf has any stale entries in it, compress them out.
1968          * The compact routine will log the header.
1969          */
1970         if (be16_to_cpu(leaf->hdr.stale))
1971                 xfs_dir2_leaf_compact(args, lbp);
1972         else
1973                 xfs_dir2_leaf_log_header(tp, lbp);
1974
1975         lbp->b_ops = &xfs_dir2_leaf1_buf_ops;
1976         leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
1977
1978         /*
1979          * Set up the leaf tail from the freespace block.
1980          */
1981         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1982         ltp->bestcount = cpu_to_be32(freehdr.nvalid);
1983         /*
1984          * Set up the leaf bests table.
1985          */
1986         memcpy(xfs_dir2_leaf_bests_p(ltp), xfs_dir3_free_bests_p(mp, free),
1987                 freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
1988         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1989         xfs_dir2_leaf_log_tail(tp, lbp);
1990         xfs_dir2_leaf_check(dp, lbp);
1991         /*
1992          * Get rid of the freespace block.
1993          */
1994         error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
1995         if (error) {
1996                 /*
1997                  * This can't fail here because it can only happen when
1998                  * punching out the middle of an extent, and this is an
1999                  * isolated block.
2000                  */
2001                 ASSERT(error != ENOSPC);
2002                 return error;
2003         }
2004         fbp = NULL;
2005         /*
2006          * Now see if we can convert the single-leaf directory
2007          * down to a block form directory.
2008          * This routine always kills the dabuf for the leaf, so
2009          * eliminate it from the path.
2010          */
2011         error = xfs_dir2_leaf_to_block(args, lbp, NULL);
2012         state->path.blk[0].bp = NULL;
2013         return error;
2014 }