fs/jffs2/gc.c

   1 /*
   2  * JFFS2 -- Journalling Flash File System, Version 2.
   3  *
   4  * Copyright © 2001-2007 Red Hat, Inc.
   5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
   6  *
   7  * Created by David Woodhouse <dwmw2@infradead.org>
   8  *
   9  * For licensing information, see the file 'LICENCE' in this directory.
  10  *
  11  */
  12
  13 #include <linux/kernel.h>
  14 #include <linux/mtd/mtd.h>
  15 #include <linux/slab.h>
  16 #include <linux/pagemap.h>
  17 #include <linux/crc32.h>
  18 #include <linux/compiler.h>
  19 #include <linux/stat.h>
  20 #include "nodelist.h"
  21 #include "compr.h"
  22
  23 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
  24                                           struct jffs2_inode_cache *ic,
  25                                           struct jffs2_raw_node_ref *raw);
  26 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  27                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
  28 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  29                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  30 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  31                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  32 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  33                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  34                                       uint32_t start, uint32_t end);
  35 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  36                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  37                                        uint32_t start, uint32_t end);
  38 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
  39                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
  40
  41 /* Called with erase_completion_lock held */
  42 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
  43 {
  44         struct jffs2_eraseblock *ret;
  45         struct list_head *nextlist = NULL;
  46         int n = jiffies % 128;
  47
  48         /* Pick an eraseblock to garbage collect next. This is where we'll
  49            put the clever wear-levelling algorithms. Eventually.  */
  50         /* We possibly want to favour the dirtier blocks more when the
  51            number of free blocks is low. */
  52 again:
  53         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
  54                 jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
  55                 nextlist = &c->bad_used_list;
  56         } else if (n < 50 && !list_empty(&c->erasable_list)) {
  57                 /* Note that most of them will have gone directly to be erased.
  58                    So don't favour the erasable_list _too_ much. */
  59                 jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
  60                 nextlist = &c->erasable_list;
  61         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
  62                 /* Most of the time, pick one off the very_dirty list */
  63                 jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
  64                 nextlist = &c->very_dirty_list;
  65         } else if (n < 126 && !list_empty(&c->dirty_list)) {
  66                 jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
  67                 nextlist = &c->dirty_list;
  68         } else if (!list_empty(&c->clean_list)) {
  69                 jffs2_dbg(1, "Picking block from clean_list to GC next\n");
  70                 nextlist = &c->clean_list;
  71         } else if (!list_empty(&c->dirty_list)) {
  72                 jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
  73
  74                 nextlist = &c->dirty_list;
  75         } else if (!list_empty(&c->very_dirty_list)) {
  76                 jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
  77                 nextlist = &c->very_dirty_list;
  78         } else if (!list_empty(&c->erasable_list)) {
  79                 jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
  80
  81                 nextlist = &c->erasable_list;
  82         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
  83                 /* There are blocks are wating for the wbuf sync */
  84                 jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
  85                 spin_unlock(&c->erase_completion_lock);
  86                 jffs2_flush_wbuf_pad(c);
  87                 spin_lock(&c->erase_completion_lock);
  88                 goto again;
  89         } else {
  90                 /* Eep. All were empty */
  91                 jffs2_dbg(1, "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
  92                 return NULL;
  93         }
  94
  95         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
  96         list_del(&ret->list);
  97         c->gcblock = ret;
  98         ret->gc_node = ret->first_node;
  99         if (!ret->gc_node) {
 100                 pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
 101                         ret->offset);
 102                 BUG();
 103         }
 104
 105         /* Have we accidentally picked a clean block with wasted space ? */
 106         if (ret->wasted_size) {
 107                 jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
 108                           ret->wasted_size);
 109                 ret->dirty_size += ret->wasted_size;
 110                 c->wasted_size -= ret->wasted_size;
 111                 c->dirty_size += ret->wasted_size;
 112                 ret->wasted_size = 0;
 113         }
 114
 115         return ret;
 116 }
 117
 118 /* jffs2_garbage_collect_pass
 119  * Make a single attempt to progress GC. Move one node, and possibly
 120  * start erasing one eraseblock.
 121  */
 122 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 123 {
 124         struct jffs2_inode_info *f;
 125         struct jffs2_inode_cache *ic;
 126         struct jffs2_eraseblock *jeb;
 127         struct jffs2_raw_node_ref *raw;
 128         uint32_t gcblock_dirty;
 129         int ret = 0, inum, nlink;
 130         int xattr = 0;
 131
 132         if (mutex_lock_interruptible(&c->alloc_sem))
 133                 return -EINTR;
 134
 135         for (;;) {
 136                 spin_lock(&c->erase_completion_lock);
 137                 if (!c->unchecked_size)
 138                         break;
 139
 140                 /* We can't start doing GC yet. We haven't finished checking
 141                    the node CRCs etc. Do it now. */
 142
 143                 /* checked_ino is protected by the alloc_sem */
 144                 if (c->checked_ino > c->highest_ino && xattr) {
 145                         pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
 146                                 c->unchecked_size);
 147                         jffs2_dbg_dump_block_lists_nolock(c);
 148                         spin_unlock(&c->erase_completion_lock);
 149                         mutex_unlock(&c->alloc_sem);
 150                         return -ENOSPC;
 151                 }
 152
 153                 spin_unlock(&c->erase_completion_lock);
 154
 155                 if (!xattr)
 156                         xattr = jffs2_verify_xattr(c);
 157
 158                 spin_lock(&c->inocache_lock);
 159
 160                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
 161
 162                 if (!ic) {
 163                         spin_unlock(&c->inocache_lock);
 164                         continue;
 165                 }
 166
 167                 if (!ic->pino_nlink) {
 168                         jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
 169                                   ic->ino);
 170                         spin_unlock(&c->inocache_lock);
 171                         jffs2_xattr_delete_inode(c, ic);
 172                         continue;
 173                 }
 174                 switch(ic->state) {
 175                 case INO_STATE_CHECKEDABSENT:
 176                 case INO_STATE_PRESENT:
 177                         jffs2_dbg(1, "Skipping ino #%u already checked\n",
 178                                   ic->ino);
 179                         spin_unlock(&c->inocache_lock);
 180                         continue;
 181
 182                 case INO_STATE_GC:
 183                 case INO_STATE_CHECKING:
 184                         pr_warn("Inode #%u is in state %d during CRC check phase!\n",
 185                                 ic->ino, ic->state);
 186                         spin_unlock(&c->inocache_lock);
 187                         BUG();
 188
 189                 case INO_STATE_READING:
 190                         /* We need to wait for it to finish, lest we move on
 191                            and trigger the BUG() above while we haven't yet
 192                            finished checking all its nodes */
 193                         jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
 194                                   ic->ino);
 195                         /* We need to come back again for the _same_ inode. We've
 196                          made no progress in this case, but that should be OK */
 197                         c->checked_ino--;
 198
 199                         mutex_unlock(&c->alloc_sem);
 200                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 201                         return 0;
 202
 203                 default:
 204                         BUG();
 205
 206                 case INO_STATE_UNCHECKED:
 207                         ;
 208                 }
 209                 ic->state = INO_STATE_CHECKING;
 210                 spin_unlock(&c->inocache_lock);
 211
 212                 jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
 213                           __func__, ic->ino);
 214
 215                 ret = jffs2_do_crccheck_inode(c, ic);
 216                 if (ret)
 217                         pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
 218                                 ic->ino);
 219
 220                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
 221                 mutex_unlock(&c->alloc_sem);
 222                 return ret;
 223         }
 224
 225         /* If there are any blocks which need erasing, erase them now */
 226         if (!list_empty(&c->erase_complete_list) ||
 227             !list_empty(&c->erase_pending_list)) {
 228                 spin_unlock(&c->erase_completion_lock);
 229                 mutex_unlock(&c->alloc_sem);
 230                 jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
 231                 if (jffs2_erase_pending_blocks(c, 1))
 232                         return 0;
 233
 234                 jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
 235                 spin_lock(&c->erase_completion_lock);
 236                 mutex_lock(&c->alloc_sem);
 237         }
 238
 239         /* First, work out which block we're garbage-collecting */
 240         jeb = c->gcblock;
 241
 242         if (!jeb)
 243                 jeb = jffs2_find_gc_block(c);
 244
 245         if (!jeb) {
 246                 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
 247                 if (c->nr_erasing_blocks) {
 248                         spin_unlock(&c->erase_completion_lock);
 249                         mutex_unlock(&c->alloc_sem);
 250                         return -EAGAIN;
 251                 }
 252                 jffs2_dbg(1, "jffs2: Couldn't find erase block to garbage collect!\n");
 253                 spin_unlock(&c->erase_completion_lock);
 254                 mutex_unlock(&c->alloc_sem);
 255                 return -EIO;
 256         }
 257
 258         jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
 259                   jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
 260         D1(if (c->nextblock)
 261            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
 262
 263         if (!jeb->used_size) {
 264                 mutex_unlock(&c->alloc_sem);
 265                 goto eraseit;
 266         }
 267
 268         raw = jeb->gc_node;
 269         gcblock_dirty = jeb->dirty_size;
 270
 271         while(ref_obsolete(raw)) {
 272                 jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
 273                           ref_offset(raw));
 274                 raw = ref_next(raw);
 275                 if (unlikely(!raw)) {
 276                         pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
 277                         pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
 278                                 jeb->offset, jeb->free_size,
 279                                 jeb->dirty_size, jeb->used_size);
 280                         jeb->gc_node = raw;
 281                         spin_unlock(&c->erase_completion_lock);
 282                         mutex_unlock(&c->alloc_sem);
 283                         BUG();
 284                 }
 285         }
 286         jeb->gc_node = raw;
 287
 288         jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
 289                   ref_offset(raw));
 290
 291         if (!raw->next_in_ino) {
 292                 /* Inode-less node. Clean marker, snapshot or something like that */
 293                 spin_unlock(&c->erase_completion_lock);
 294                 if (ref_flags(raw) == REF_PRISTINE) {
 295                         /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
 296                         jffs2_garbage_collect_pristine(c, NULL, raw);
 297                 } else {
 298                         /* Just mark it obsolete */
 299                         jffs2_mark_node_obsolete(c, raw);
 300                 }
 301                 mutex_unlock(&c->alloc_sem);
 302                 goto eraseit_lock;
 303         }
 304
 305         ic = jffs2_raw_ref_to_ic(raw);
 306
 307 #ifdef CONFIG_JFFS2_FS_XATTR
 308         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
 309          * We can decide whether this node is inode or xattr by ic->class.     */
 310         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
 311             || ic->class == RAWNODE_CLASS_XATTR_REF) {
 312                 spin_unlock(&c->erase_completion_lock);
 313
 314                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
 315                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
 316                 } else {
 317                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
 318                 }
 319                 goto test_gcnode;
 320         }
 321 #endif
 322
 323         /* We need to hold the inocache. Either the erase_completion_lock or
 324            the inocache_lock are sufficient; we trade down since the inocache_lock
 325            causes less contention. */
 326         spin_lock(&c->inocache_lock);
 327
 328         spin_unlock(&c->erase_completion_lock);
 329
 330         jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
 331                   __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
 332                   ic->ino);
 333
 334         /* Three possibilities:
 335            1. Inode is already in-core. We must iget it and do proper
 336               updating to its fragtree, etc.
 337            2. Inode is not in-core, node is REF_PRISTINE. We lock the
 338               inocache to prevent a read_inode(), copy the node intact.
 339            3. Inode is not in-core, node is not pristine. We must iget()
 340               and take the slow path.
 341         */
 342
 343         switch(ic->state) {
 344         case INO_STATE_CHECKEDABSENT:
 345                 /* It's been checked, but it's not currently in-core.
 346                    We can just copy any pristine nodes, but have
 347                    to prevent anyone else from doing read_inode() while
 348                    we're at it, so we set the state accordingly */
 349                 if (ref_flags(raw) == REF_PRISTINE)
 350                         ic->state = INO_STATE_GC;
 351                 else {
 352                         jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
 353                                   ic->ino);
 354                 }
 355                 break;
 356
 357         case INO_STATE_PRESENT:
 358                 /* It's in-core. GC must iget() it. */
 359                 break;
 360
 361         case INO_STATE_UNCHECKED:
 362         case INO_STATE_CHECKING:
 363         case INO_STATE_GC:
 364                 /* Should never happen. We should have finished checking
 365                    by the time we actually start doing any GC, and since
 366                    we're holding the alloc_sem, no other garbage collection
 367                    can happen.
 368                 */
 369                 pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
 370                         ic->ino, ic->state);
 371                 mutex_unlock(&c->alloc_sem);
 372                 spin_unlock(&c->inocache_lock);
 373                 BUG();
 374
 375         case INO_STATE_READING:
 376                 /* Someone's currently trying to read it. We must wait for
 377                    them to finish and then go through the full iget() route
 378                    to do the GC. However, sometimes read_inode() needs to get
 379                    the alloc_sem() (for marking nodes invalid) so we must
 380                    drop the alloc_sem before sleeping. */
 381
 382                 mutex_unlock(&c->alloc_sem);
 383                 jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
 384                           __func__, ic->ino, ic->state);
 385                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 386                 /* And because we dropped the alloc_sem we must start again from the
 387                    beginning. Ponder chance of livelock here -- we're returning success
 388                    without actually making any progress.
 389
 390                    Q: What are the chances that the inode is back in INO_STATE_READING
 391                    again by the time we next enter this function? And that this happens
 392                    enough times to cause a real delay?
 393
 394                    A: Small enough that I don't care :)
 395                 */
 396                 return 0;
 397         }
 398
 399         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
 400            node intact, and we don't have to muck about with the fragtree etc.
 401            because we know it's not in-core. If it _was_ in-core, we go through
 402            all the iget() crap anyway */
 403
 404         if (ic->state == INO_STATE_GC) {
 405                 spin_unlock(&c->inocache_lock);
 406
 407                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
 408
 409                 spin_lock(&c->inocache_lock);
 410                 ic->state = INO_STATE_CHECKEDABSENT;
 411                 wake_up(&c->inocache_wq);
 412
 413                 if (ret != -EBADFD) {
 414                         spin_unlock(&c->inocache_lock);
 415                         goto test_gcnode;
 416                 }
 417
 418                 /* Fall through if it wanted us to, with inocache_lock held */
 419         }
 420
 421         /* Prevent the fairly unlikely race where the gcblock is
 422            entirely obsoleted by the final close of a file which had
 423            the only valid nodes in the block, followed by erasure,
 424            followed by freeing of the ic because the erased block(s)
 425            held _all_ the nodes of that inode.... never been seen but
 426            it's vaguely possible. */
 427
 428         inum = ic->ino;
 429         nlink = ic->pino_nlink;
 430         spin_unlock(&c->inocache_lock);
 431
 432         f = jffs2_gc_fetch_inode(c, inum, !nlink);
 433         if (IS_ERR(f)) {
 434                 ret = PTR_ERR(f);
 435                 goto release_sem;
 436         }
 437         if (!f) {
 438                 ret = 0;
 439                 goto release_sem;
 440         }
 441
 442         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
 443
 444         jffs2_gc_release_inode(c, f);
 445
 446  test_gcnode:
 447         if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
 448                 /* Eep. This really should never happen. GC is broken */
 449                 pr_err("Error garbage collecting node at %08x!\n",
 450                        ref_offset(jeb->gc_node));
 451                 ret = -ENOSPC;
 452         }
 453  release_sem:
 454         mutex_unlock(&c->alloc_sem);
 455
 456  eraseit_lock:
 457         /* If we've finished this block, start it erasing */
 458         spin_lock(&c->erase_completion_lock);
 459
 460  eraseit:
 461         if (c->gcblock && !c->gcblock->used_size) {
 462                 jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
 463                           c->gcblock->offset);
 464                 /* We're GC'ing an empty block? */
 465                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
 466                 c->gcblock = NULL;
 467                 c->nr_erasing_blocks++;
 468                 jffs2_garbage_collect_trigger(c);
 469         }
 470         spin_unlock(&c->erase_completion_lock);
 471
 472         return ret;
 473 }
 474
 475 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
 476                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
 477 {
 478         struct jffs2_node_frag *frag;
 479         struct jffs2_full_dnode *fn = NULL;
 480         struct jffs2_full_dirent *fd;
 481         uint32_t start = 0, end = 0, nrfrags = 0;
 482         int ret = 0;
 483
 484         mutex_lock(&f->sem);
 485
 486         /* Now we have the lock for this inode. Check that it's still the one at the head
 487            of the list. */
 488
 489         spin_lock(&c->erase_completion_lock);
 490
 491         if (c->gcblock != jeb) {
 492                 spin_unlock(&c->erase_completion_lock);
 493                 jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
 494                 goto upnout;
 495         }
 496         if (ref_obsolete(raw)) {
 497                 spin_unlock(&c->erase_completion_lock);
 498                 jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
 499                 /* They'll call again */
 500                 goto upnout;
 501         }
 502         spin_unlock(&c->erase_completion_lock);
 503
 504         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
 505         if (f->metadata && f->metadata->raw == raw) {
 506                 fn = f->metadata;
 507                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
 508                 goto upnout;
 509         }
 510
 511         /* FIXME. Read node and do lookup? */
 512         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
 513                 if (frag->node && frag->node->raw == raw) {
 514                         fn = frag->node;
 515                         end = frag->ofs + frag->size;
 516                         if (!nrfrags++)
 517                                 start = frag->ofs;
 518                         if (nrfrags == frag->node->frags)
 519                                 break; /* We've found them all */
 520                 }
 521         }
 522         if (fn) {
 523                 if (ref_flags(raw) == REF_PRISTINE) {
 524                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
 525                         if (!ret) {
 526                                 /* Urgh. Return it sensibly. */
 527                                 frag->node->raw = f->inocache->nodes;
 528                         }
 529                         if (ret != -EBADFD)
 530                                 goto upnout;
 531                 }
 532                 /* We found a datanode. Do the GC */
 533                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
 534                         /* It crosses a page boundary. Therefore, it must be a hole. */
 535                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
 536                 } else {
 537                         /* It could still be a hole. But we GC the page this way anyway */
 538                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
 539                 }
 540                 goto upnout;
 541         }
 542
 543         /* Wasn't a dnode. Try dirent */
 544         for (fd = f->dents; fd; fd=fd->next) {
 545                 if (fd->raw == raw)
 546                         break;
 547         }
 548
 549         if (fd && fd->ino) {
 550                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
 551         } else if (fd) {
 552                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
 553         } else {
 554                 pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
 555                         ref_offset(raw), f->inocache->ino);
 556                 if (ref_obsolete(raw)) {
 557                         pr_warn("But it's obsolete so we don't mind too much\n");
 558                 } else {
 559                         jffs2_dbg_dump_node(c, ref_offset(raw));
 560                         BUG();
 561                 }
 562         }
 563  upnout:
 564         mutex_unlock(&f->sem);
 565
 566         return ret;
 567 }
 568
 569 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
 570                                           struct jffs2_inode_cache *ic,
 571                                           struct jffs2_raw_node_ref *raw)
 572 {
 573         union jffs2_node_union *node;
 574         size_t retlen;
 575         int ret;
 576         uint32_t phys_ofs, alloclen;
 577         uint32_t crc, rawlen;
 578         int retried = 0;
 579
 580         jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
 581                   ref_offset(raw));
 582
 583         alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
 584
 585         /* Ask for a small amount of space (or the totlen if smaller) because we
 586            don't want to force wastage of the end of a block if splitting would
 587            work. */
 588         if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
 589                 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
 590
 591         ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
 592         /* 'rawlen' is not the exact summary size; it is only an upper estimation */
 593
 594         if (ret)
 595                 return ret;
 596
 597         if (alloclen < rawlen) {
 598                 /* Doesn't fit untouched. We'll go the old route and split it */
 599                 return -EBADFD;
 600         }
 601
 602         node = kmalloc(rawlen, GFP_KERNEL);
 603         if (!node)
 604                 return -ENOMEM;
 605
 606         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
 607         if (!ret && retlen != rawlen)
 608                 ret = -EIO;
 609         if (ret)
 610                 goto out_node;
 611
 612         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
 613         if (je32_to_cpu(node->u.hdr_crc) != crc) {
 614                 pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 615                         ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
 616                 goto bail;
 617         }
 618
 619         switch(je16_to_cpu(node->u.nodetype)) {
 620         case JFFS2_NODETYPE_INODE:
 621                 crc = crc32(0, node, sizeof(node->i)-8);
 622                 if (je32_to_cpu(node->i.node_crc) != crc) {
 623                         pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 624                                 ref_offset(raw), je32_to_cpu(node->i.node_crc),
 625                                 crc);
 626                         goto bail;
 627                 }
 628
 629                 if (je32_to_cpu(node->i.dsize)) {
 630                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
 631                         if (je32_to_cpu(node->i.data_crc) != crc) {
 632                                 pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 633                                         ref_offset(raw),
 634                                         je32_to_cpu(node->i.data_crc), crc);
 635                                 goto bail;
 636                         }
 637                 }
 638                 break;
 639
 640         case JFFS2_NODETYPE_DIRENT:
 641                 crc = crc32(0, node, sizeof(node->d)-8);
 642                 if (je32_to_cpu(node->d.node_crc) != crc) {
 643                         pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 644                                 ref_offset(raw),
 645                                 je32_to_cpu(node->d.node_crc), crc);
 646                         goto bail;
 647                 }
 648
 649                 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
 650                         pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
 651                                 ref_offset(raw));
 652                         goto bail;
 653                 }
 654
 655                 if (node->d.nsize) {
 656                         crc = crc32(0, node->d.name, node->d.nsize);
 657                         if (je32_to_cpu(node->d.name_crc) != crc) {
 658                                 pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 659                                         ref_offset(raw),
 660                                         je32_to_cpu(node->d.name_crc), crc);
 661                                 goto bail;
 662                         }
 663                 }
 664                 break;
 665         default:
 666                 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
 667                 if (ic) {
 668                         pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
 669                                 ref_offset(raw), je16_to_cpu(node->u.nodetype));
 670                         goto bail;
 671                 }
 672         }
 673
 674         /* OK, all the CRCs are good; this node can just be copied as-is. */
 675  retry:
 676         phys_ofs = write_ofs(c);
 677
 678         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
 679
 680         if (ret || (retlen != rawlen)) {
 681                 pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
 682                           rawlen, phys_ofs, ret, retlen);
 683                 if (retlen) {
 684                         jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
 685                 } else {
 686                         pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
 687                                   phys_ofs);
 688                 }
 689                 if (!retried) {
 690                         /* Try to reallocate space and retry */
 691                         uint32_t dummy;
 692                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
 693
 694                         retried = 1;
 695
 696                         jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
 697
 698                         jffs2_dbg_acct_sanity_check(c,jeb);
 699                         jffs2_dbg_acct_paranoia_check(c, jeb);
 700
 701                         ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
 702                                                 /* this is not the exact summary size of it,
 703                                                         it is only an upper estimation */
 704
 705                         if (!ret) {
 706                                 jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
 707                                           phys_ofs);
 708
 709                                 jffs2_dbg_acct_sanity_check(c,jeb);
 710                                 jffs2_dbg_acct_paranoia_check(c, jeb);
 711
 712                                 goto retry;
 713                         }
 714                         jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
 715                                   ret);
 716                 }
 717
 718                 if (!ret)
 719                         ret = -EIO;
 720                 goto out_node;
 721         }
 722         jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
 723
 724         jffs2_mark_node_obsolete(c, raw);
 725         jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
 726                   ref_offset(raw));
 727
 728  out_node:
 729         kfree(node);
 730         return ret;
 731  bail:
 732         ret = -EBADFD;
 733         goto out_node;
 734 }
 735
 736 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 737                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
 738 {
 739         struct jffs2_full_dnode *new_fn;
 740         struct jffs2_raw_inode ri;
 741         struct jffs2_node_frag *last_frag;
 742         union jffs2_device_node dev;
 743         char *mdata = NULL;
 744         int mdatalen = 0;
 745         uint32_t alloclen, ilen;
 746         int ret;
 747
 748         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
 749             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
 750                 /* For these, we don't actually need to read the old node */
 751                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
 752                 mdata = (char *)&dev;
 753                 jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
 754                           __func__, mdatalen);
 755         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
 756                 mdatalen = fn->size;
 757                 mdata = kmalloc(fn->size, GFP_KERNEL);
 758                 if (!mdata) {
 759                         pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
 760                         return -ENOMEM;
 761                 }
 762                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
 763                 if (ret) {
 764                         pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
 765                                 ret);
 766                         kfree(mdata);
 767                         return ret;
 768                 }
 769                 jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
 770                           __func__, mdatalen);
 771
 772         }
 773
 774         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
 775                                 JFFS2_SUMMARY_INODE_SIZE);
 776         if (ret) {
 777                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
 778                         sizeof(ri) + mdatalen, ret);
 779                 goto out;
 780         }
 781
 782         last_frag = frag_last(&f->fragtree);
 783         if (last_frag)
 784                 /* Fetch the inode length from the fragtree rather then
 785                  * from i_size since i_size may have not been updated yet */
 786                 ilen = last_frag->ofs + last_frag->size;
 787         else
 788                 ilen = JFFS2_F_I_SIZE(f);
 789
 790         memset(&ri, 0, sizeof(ri));
 791         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 792         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
 793         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
 794         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
 795
 796         ri.ino = cpu_to_je32(f->inocache->ino);
 797         ri.version = cpu_to_je32(++f->highest_version);
 798         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
 799         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
 800         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
 801         ri.isize = cpu_to_je32(ilen);
 802         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
 803         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
 804         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 805         ri.offset = cpu_to_je32(0);
 806         ri.csize = cpu_to_je32(mdatalen);
 807         ri.dsize = cpu_to_je32(mdatalen);
 808         ri.compr = JFFS2_COMPR_NONE;
 809         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
 810         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
 811
 812         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
 813
 814         if (IS_ERR(new_fn)) {
 815                 pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
 816                 ret = PTR_ERR(new_fn);
 817                 goto out;
 818         }
 819         jffs2_mark_node_obsolete(c, fn->raw);
 820         jffs2_free_full_dnode(fn);
 821         f->metadata = new_fn;
 822  out:
 823         if (S_ISLNK(JFFS2_F_I_MODE(f)))
 824                 kfree(mdata);
 825         return ret;
 826 }
 827
 828 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 829                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 830 {
 831         struct jffs2_full_dirent *new_fd;
 832         struct jffs2_raw_dirent rd;
 833         uint32_t alloclen;
 834         int ret;
 835
 836         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 837         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
 838         rd.nsize = strlen(fd->name);
 839         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
 840         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
 841
 842         rd.pino = cpu_to_je32(f->inocache->ino);
 843         rd.version = cpu_to_je32(++f->highest_version);
 844         rd.ino = cpu_to_je32(fd->ino);
 845         /* If the times on this inode were set by explicit utime() they can be different,
 846            so refrain from splatting them. */
 847         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
 848                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 849         else
 850                 rd.mctime = cpu_to_je32(0);
 851         rd.type = fd->type;
 852         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
 853         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
 854
 855         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
 856                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
 857         if (ret) {
 858                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
 859                         sizeof(rd)+rd.nsize, ret);
 860                 return ret;
 861         }
 862         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
 863
 864         if (IS_ERR(new_fd)) {
 865                 pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
 866                         PTR_ERR(new_fd));
 867                 return PTR_ERR(new_fd);
 868         }
 869         jffs2_add_fd_to_list(c, new_fd, &f->dents);
 870         return 0;
 871 }
 872
 873 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 874                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 875 {
 876         struct jffs2_full_dirent **fdp = &f->dents;
 877         int found = 0;
 878
 879         /* On a medium where we can't actually mark nodes obsolete
 880            pernamently, such as NAND flash, we need to work out
 881            whether this deletion dirent is still needed to actively
 882            delete a 'real' dirent with the same name that's still
 883            somewhere else on the flash. */
 884         if (!jffs2_can_mark_obsolete(c)) {
 885                 struct jffs2_raw_dirent *rd;
 886                 struct jffs2_raw_node_ref *raw;
 887                 int ret;
 888                 size_t retlen;
 889                 int name_len = strlen(fd->name);
 890                 uint32_t name_crc = crc32(0, fd->name, name_len);
 891                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
 892
 893                 rd = kmalloc(rawlen, GFP_KERNEL);
 894                 if (!rd)
 895                         return -ENOMEM;
 896
 897                 /* Prevent the erase code from nicking the obsolete node refs while
 898                    we're looking at them. I really don't like this extra lock but
 899                    can't see any alternative. Suggestions on a postcard to... */
 900                 mutex_lock(&c->erase_free_sem);
 901
 902                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
 903
 904                         cond_resched();
 905
 906                         /* We only care about obsolete ones */
 907                         if (!(ref_obsolete(raw)))
 908                                 continue;
 909
 910                         /* Any dirent with the same name is going to have the same length... */
 911                         if (ref_totlen(c, NULL, raw) != rawlen)
 912                                 continue;
 913
 914                         /* Doesn't matter if there's one in the same erase block. We're going to
 915                            delete it too at the same time. */
 916                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
 917                                 continue;
 918
 919                         jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
 920                                   ref_offset(raw));
 921
 922                         /* This is an obsolete node belonging to the same directory, and it's of the right
 923                            length. We need to take a closer look...*/
 924                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
 925                         if (ret) {
 926                                 pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
 927                                         __func__, ret, ref_offset(raw));
 928                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
 929                                 continue;
 930                         }
 931                         if (retlen != rawlen) {
 932                                 pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
 933                                         __func__, retlen, rawlen,
 934                                         ref_offset(raw));
 935                                 continue;
 936                         }
 937
 938                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
 939                                 continue;
 940
 941                         /* If the name CRC doesn't match, skip */
 942                         if (je32_to_cpu(rd->name_crc) != name_crc)
 943                                 continue;
 944
 945                         /* If the name length doesn't match, or it's another deletion dirent, skip */
 946                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
 947                                 continue;
 948
 949                         /* OK, check the actual name now */
 950                         if (memcmp(rd->name, fd->name, name_len))
 951                                 continue;
 952
 953                         /* OK. The name really does match. There really is still an older node on
 954                            the flash which our deletion dirent obsoletes. So we have to write out
 955                            a new deletion dirent to replace it */
 956                         mutex_unlock(&c->erase_free_sem);
 957
 958                         jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
 959                                   ref_offset(fd->raw), fd->name,
 960                                   ref_offset(raw), je32_to_cpu(rd->ino));
 961                         kfree(rd);
 962
 963                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
 964                 }
 965
 966                 mutex_unlock(&c->erase_free_sem);
 967                 kfree(rd);
 968         }
 969
 970         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
 971            we should update the metadata node with those times accordingly */
 972
 973         /* No need for it any more. Just mark it obsolete and remove it from the list */
 974         while (*fdp) {
 975                 if ((*fdp) == fd) {
 976                         found = 1;
 977                         *fdp = fd->next;
 978                         break;
 979                 }
 980                 fdp = &(*fdp)->next;
 981         }
 982         if (!found) {
 983                 pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
 984                         fd->name, f->inocache->ino);
 985         }
 986         jffs2_mark_node_obsolete(c, fd->raw);
 987         jffs2_free_full_dirent(fd);
 988         return 0;
 989 }
 990
 991 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 992                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
 993                                       uint32_t start, uint32_t end)
 994 {
 995         struct jffs2_raw_inode ri;
 996         struct jffs2_node_frag *frag;
 997         struct jffs2_full_dnode *new_fn;
 998         uint32_t alloclen, ilen;
 999         int ret;
1000
1001         jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
1002                   f->inocache->ino, start, end);
1003
1004         memset(&ri, 0, sizeof(ri));
1005
1006         if(fn->frags > 1) {
1007                 size_t readlen;
1008                 uint32_t crc;
1009                 /* It's partially obsoleted by a later write. So we have to
1010                    write it out again with the _same_ version as before */
1011                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
1012                 if (readlen != sizeof(ri) || ret) {
1013                         pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1014                                 ret, readlen);
1015                         goto fill;
1016                 }
1017                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
1018                         pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
1019                                 __func__, ref_offset(fn->raw),
1020                                 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
1021                         return -EIO;
1022                 }
1023                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
1024                         pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
1025                                 __func__, ref_offset(fn->raw),
1026                                 je32_to_cpu(ri.totlen), sizeof(ri));
1027                         return -EIO;
1028                 }
1029                 crc = crc32(0, &ri, sizeof(ri)-8);
1030                 if (crc != je32_to_cpu(ri.node_crc)) {
1031                         pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
1032                                 __func__, ref_offset(fn->raw),
1033                                 je32_to_cpu(ri.node_crc), crc);
1034                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
1035                         pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1036                                 start, end, f->inocache->ino);
1037                         goto fill;
1038                 }
1039                 if (ri.compr != JFFS2_COMPR_ZERO) {
1040                         pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1041                                 __func__, ref_offset(fn->raw));
1042                         pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1043                                 start, end, f->inocache->ino);
1044                         goto fill;
1045                 }
1046         } else {
1047         fill:
1048                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1049                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1050                 ri.totlen = cpu_to_je32(sizeof(ri));
1051                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1052
1053                 ri.ino = cpu_to_je32(f->inocache->ino);
1054                 ri.version = cpu_to_je32(++f->highest_version);
1055                 ri.offset = cpu_to_je32(start);
1056                 ri.dsize = cpu_to_je32(end - start);
1057                 ri.csize = cpu_to_je32(0);
1058                 ri.compr = JFFS2_COMPR_ZERO;
1059         }
1060
1061         frag = frag_last(&f->fragtree);
1062         if (frag)
1063                 /* Fetch the inode length from the fragtree rather then
1064                  * from i_size since i_size may have not been updated yet */
1065                 ilen = frag->ofs + frag->size;
1066         else
1067                 ilen = JFFS2_F_I_SIZE(f);
1068
1069         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1070         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1071         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1072         ri.isize = cpu_to_je32(ilen);
1073         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1074         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1075         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1076         ri.data_crc = cpu_to_je32(0);
1077         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1078
1079         ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1080                                      JFFS2_SUMMARY_INODE_SIZE);
1081         if (ret) {
1082                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1083                         sizeof(ri), ret);
1084                 return ret;
1085         }
1086         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1087
1088         if (IS_ERR(new_fn)) {
1089                 pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1090                 return PTR_ERR(new_fn);
1091         }
1092         if (je32_to_cpu(ri.version) == f->highest_version) {
1093                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1094                 if (f->metadata) {
1095                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1096                         jffs2_free_full_dnode(f->metadata);
1097                         f->metadata = NULL;
1098                 }
1099                 return 0;
1100         }
1101
1102         /*
1103          * We should only get here in the case where the node we are
1104          * replacing had more than one frag, so we kept the same version
1105          * number as before. (Except in case of error -- see 'goto fill;'
1106          * above.)
1107          */
1108         D1(if(unlikely(fn->frags <= 1)) {
1109                         pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1110                                 __func__, fn->frags, je32_to_cpu(ri.version),
1111                                 f->highest_version, je32_to_cpu(ri.ino));
1112         });
1113
1114         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1115         mark_ref_normal(new_fn->raw);
1116
1117         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1118              frag; frag = frag_next(frag)) {
1119                 if (frag->ofs > fn->size + fn->ofs)
1120                         break;
1121                 if (frag->node == fn) {
1122                         frag->node = new_fn;
1123                         new_fn->frags++;
1124                         fn->frags--;
1125                 }
1126         }
1127         if (fn->frags) {
1128                 pr_warn("%s(): Old node still has frags!\n", __func__);
1129                 BUG();
1130         }
1131         if (!new_fn->frags) {
1132                 pr_warn("%s(): New node has no frags!\n", __func__);
1133                 BUG();
1134         }
1135
1136         jffs2_mark_node_obsolete(c, fn->raw);
1137         jffs2_free_full_dnode(fn);
1138
1139         return 0;
1140 }
1141
1142 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1143                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1144                                        uint32_t start, uint32_t end)
1145 {
1146         struct jffs2_full_dnode *new_fn;
1147         struct jffs2_raw_inode ri;
1148         uint32_t alloclen, offset, orig_end, orig_start;
1149         int ret = 0;
1150         unsigned char *comprbuf = NULL, *writebuf;
1151         unsigned long pg;
1152         unsigned char *pg_ptr;
1153
1154         memset(&ri, 0, sizeof(ri));
1155
1156         jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1157                   f->inocache->ino, start, end);
1158
1159         orig_end = end;
1160         orig_start = start;
1161
1162         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1163                 /* Attempt to do some merging. But only expand to cover logically
1164                    adjacent frags if the block containing them is already considered
1165                    to be dirty. Otherwise we end up with GC just going round in
1166                    circles dirtying the nodes it already wrote out, especially
1167                    on NAND where we have small eraseblocks and hence a much higher
1168                    chance of nodes having to be split to cross boundaries. */
1169
1170                 struct jffs2_node_frag *frag;
1171                 uint32_t min, max;
1172
1173                 min = start & ~(PAGE_CACHE_SIZE-1);
1174                 max = min + PAGE_CACHE_SIZE;
1175
1176                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1177
1178                 /* BUG_ON(!frag) but that'll happen anyway... */
1179
1180                 BUG_ON(frag->ofs != start);
1181
1182                 /* First grow down... */
1183                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1184
1185                         /* If the previous frag doesn't even reach the beginning, there's
1186                            excessive fragmentation. Just merge. */
1187                         if (frag->ofs > min) {
1188                                 jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1189                                           frag->ofs, frag->ofs+frag->size);
1190                                 start = frag->ofs;
1191                                 continue;
1192                         }
1193                         /* OK. This frag holds the first byte of the page. */
1194                         if (!frag->node || !frag->node->raw) {
1195                                 jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1196                                           frag->ofs, frag->ofs+frag->size);
1197                                 break;
1198                         } else {
1199
1200                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1201                                    in a block which is still considered clean? If so, don't obsolete it.
1202                                    If not, cover it anyway. */
1203
1204                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1205                                 struct jffs2_eraseblock *jeb;
1206
1207                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1208
1209                                 if (jeb == c->gcblock) {
1210                                         jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1211                                                   frag->ofs,
1212                                                   frag->ofs + frag->size,
1213                                                   ref_offset(raw));
1214                                         start = frag->ofs;
1215                                         break;
1216                                 }
1217                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1218                                         jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1219                                                   frag->ofs,
1220                                                   frag->ofs + frag->size,
1221                                                   jeb->offset);
1222                                         break;
1223                                 }
1224
1225                                 jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1226                                           frag->ofs,
1227                                           frag->ofs + frag->size,
1228                                           jeb->offset);
1229                                 start = frag->ofs;
1230                                 break;
1231                         }
1232                 }
1233
1234                 /* ... then up */
1235
1236                 /* Find last frag which is actually part of the node we're to GC. */
1237                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1238
1239                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1240
1241                         /* If the previous frag doesn't even reach the beginning, there's lots
1242                            of fragmentation. Just merge. */
1243                         if (frag->ofs+frag->size < max) {
1244                                 jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1245                                           frag->ofs, frag->ofs+frag->size);
1246                                 end = frag->ofs + frag->size;
1247                                 continue;
1248                         }
1249
1250                         if (!frag->node || !frag->node->raw) {
1251                                 jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1252                                           frag->ofs, frag->ofs+frag->size);
1253                                 break;
1254                         } else {
1255
1256                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1257                                    in a block which is still considered clean? If so, don't obsolete it.
1258                                    If not, cover it anyway. */
1259
1260                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1261                                 struct jffs2_eraseblock *jeb;
1262
1263                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1264
1265                                 if (jeb == c->gcblock) {
1266                                         jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1267                                                   frag->ofs,
1268                                                   frag->ofs + frag->size,
1269                                                   ref_offset(raw));
1270                                         end = frag->ofs + frag->size;
1271                                         break;
1272                                 }
1273                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1274                                         jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1275                                                   frag->ofs,
1276                                                   frag->ofs + frag->size,
1277                                                   jeb->offset);
1278                                         break;
1279                                 }
1280
1281                                 jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1282                                           frag->ofs,
1283                                           frag->ofs + frag->size,
1284                                           jeb->offset);
1285                                 end = frag->ofs + frag->size;
1286                                 break;
1287                         }
1288                 }
1289                 jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1290                           orig_start, orig_end, start, end);
1291
1292                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1293                 BUG_ON(end < orig_end);
1294                 BUG_ON(start > orig_start);
1295         }
1296
1297         /* First, use readpage() to read the appropriate page into the page cache */
1298         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1299          *    triggered garbage collection in the first place?
1300          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1301          *    page OK. We'll actually write it out again in commit_write, which is a little
1302          *    suboptimal, but at least we're correct.
1303          */
1304         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1305
1306         if (IS_ERR(pg_ptr)) {
1307                 pr_warn("read_cache_page() returned error: %ld\n",
1308                         PTR_ERR(pg_ptr));
1309                 return PTR_ERR(pg_ptr);
1310         }
1311
1312         offset = start;
1313         while(offset < orig_end) {
1314                 uint32_t datalen;
1315                 uint32_t cdatalen;
1316                 uint16_t comprtype = JFFS2_COMPR_NONE;
1317
1318                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1319                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1320
1321                 if (ret) {
1322                         pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1323                                 sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1324                         break;
1325                 }
1326                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1327                 datalen = end - offset;
1328
1329                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1330
1331                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1332
1333                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1334                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1335                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1336                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1337
1338                 ri.ino = cpu_to_je32(f->inocache->ino);
1339                 ri.version = cpu_to_je32(++f->highest_version);
1340                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1341                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1342                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1343                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1344                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1345                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1346                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1347                 ri.offset = cpu_to_je32(offset);
1348                 ri.csize = cpu_to_je32(cdatalen);
1349                 ri.dsize = cpu_to_je32(datalen);
1350                 ri.compr = comprtype & 0xff;
1351                 ri.usercompr = (comprtype >> 8) & 0xff;
1352                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1353                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1354
1355                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1356
1357                 jffs2_free_comprbuf(comprbuf, writebuf);
1358
1359                 if (IS_ERR(new_fn)) {
1360                         pr_warn("Error writing new dnode: %ld\n",
1361                                 PTR_ERR(new_fn));
1362                         ret = PTR_ERR(new_fn);
1363                         break;
1364                 }
1365                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1366                 offset += datalen;
1367                 if (f->metadata) {
1368                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1369                         jffs2_free_full_dnode(f->metadata);
1370                         f->metadata = NULL;
1371                 }
1372         }
1373
1374         jffs2_gc_release_page(c, pg_ptr, &pg);
1375         return ret;
1376 }