Gentoo Archives: gentoo-commits

From: "Christian Heim (phreak)" <phreak@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] hardened r95 - in hardened-sources/2.6/tags: . 2.6.23-5
Date: Wed, 30 Apr 2008 11:45:06
Message-Id: E1JrAg5-0001GE-R8@stork.gentoo.org
1 Author: phreak
2 Date: 2008-04-30 11:40:48 +0000 (Wed, 30 Apr 2008)
3 New Revision: 95
4
5 Added:
6 hardened-sources/2.6/tags/2.6.23-5/
7 hardened-sources/2.6/tags/2.6.23-5/4105_dm-bbr.patch
8 hardened-sources/2.6/tags/2.6.23-5/4300_squashfs-3.2-r2.patch
9 hardened-sources/2.6/tags/2.6.23-5/4405_alpha-sysctl-uac.patch
10 hardened-sources/2.6/tags/2.6.23-5/4450_grsec-2.1.11-2.6.23.15-20080210.patch
11 hardened-sources/2.6/tags/2.6.23-5/4455_grsec-2.1.10-mute-warnings.patch
12 hardened-sources/2.6/tags/2.6.23-5/4460_grsec-2.1.10-pax_curr_ip-fixes.patch
13 hardened-sources/2.6/tags/2.6.23-5/4465_grsec-kconfig-gentoo.patch
14 hardened-sources/2.6/tags/2.6.23-5/4470_selinux-avc_audit-log-curr_ip.patch
15 hardened-sources/2.6/tags/2.6.23-5/4475_compat_vdso-defconfig.patch
16 hardened-sources/2.6/tags/2.6.23-5/4480_x86_64-bogus-acpi-symbols-defconfig.patch
17 Log:
18 Importing patchset for 2.6.23-5 (from hardened-patches-2.6.23-5.extras.tar.bz2).
19
20 Added: hardened-sources/2.6/tags/2.6.23-5/4105_dm-bbr.patch
21 ===================================================================
22 --- hardened-sources/2.6/tags/2.6.23-5/4105_dm-bbr.patch (rev 0)
23 +++ hardened-sources/2.6/tags/2.6.23-5/4105_dm-bbr.patch 2008-04-30 11:40:48 UTC (rev 95)
24 @@ -0,0 +1,1181 @@
25 +BBR Target, updated by dsd@g.o
26 +
27 +Incomplete changelog:
28 + 2007/07/08: updated for new API in 2.6.22
29 +
30 +--- a/drivers/md/Kconfig
31 ++++ b/drivers/md/Kconfig
32 +@@ -276,4 +276,15 @@ config DM_DELAY
33 +
34 + If unsure, say N.
35 +
36 ++config BLK_DEV_DM_BBR
37 ++ tristate "Bad Block Relocation Device Target (EXPERIMENTAL)"
38 ++ depends on BLK_DEV_DM && EXPERIMENTAL
39 ++ ---help---
40 ++ Support for devices with software-based bad-block-relocation.
41 ++
42 ++ To compile this as a module, choose M here: the module will be
43 ++ called dm-bbr.
44 ++
45 ++ If unsure, say N.
46 ++
47 + endif # MD
48 +--- a/drivers/md/Makefile
49 ++++ b/drivers/md/Makefile
50 +@@ -39,6 +39,7 @@ obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rd
51 + obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
52 + obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
53 + obj-$(CONFIG_DM_ZERO) += dm-zero.o
54 ++obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o
55 +
56 + quiet_cmd_unroll = UNROLL $@
57 + cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
58 +--- /dev/null
59 ++++ b/drivers/md/dm-bbr.c
60 +@@ -0,0 +1,1012 @@
61 ++/*
62 ++ * (C) Copyright IBM Corp. 2002, 2004
63 ++ *
64 ++ * This program is free software; you can redistribute it and/or modify
65 ++ * it under the terms of the GNU General Public License as published by
66 ++ * the Free Software Foundation; either version 2 of the License, or
67 ++ * (at your option) any later version.
68 ++ *
69 ++ * This program is distributed in the hope that it will be useful,
70 ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
71 ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
72 ++ * the GNU General Public License for more details.
73 ++ *
74 ++ * You should have received a copy of the GNU General Public License
75 ++ * along with this program; if not, write to the Free Software
76 ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
77 ++ *
78 ++ * linux/drivers/md/dm-bbr.c
79 ++ *
80 ++ * Bad-block-relocation (BBR) target for device-mapper.
81 ++ *
82 ++ * The BBR target is designed to remap I/O write failures to another safe
83 ++ * location on disk. Note that most disk drives have BBR built into them,
84 ++ * this means that our software BBR will be only activated when all hardware
85 ++ * BBR replacement sectors have been used.
86 ++ */
87 ++
88 ++#include <linux/module.h>
89 ++#include <linux/init.h>
90 ++#include <linux/bio.h>
91 ++#include <linux/spinlock.h>
92 ++#include <linux/slab.h>
93 ++#include <linux/mempool.h>
94 ++#include <linux/workqueue.h>
95 ++#include <linux/vmalloc.h>
96 ++
97 ++#include "dm.h"
98 ++#include "dm-bio-list.h"
99 ++#include "dm-bio-record.h"
100 ++#include "dm-bbr.h"
101 ++#include "dm-io.h"
102 ++
103 ++#define DM_MSG_PREFIX "bbr"
104 ++#define SECTOR_SIZE (1 << SECTOR_SHIFT)
105 ++
106 ++static struct workqueue_struct *dm_bbr_wq = NULL;
107 ++static void bbr_remap_handler(struct work_struct *work);
108 ++static struct kmem_cache *bbr_remap_cache;
109 ++static struct kmem_cache *bbr_io_cache;
110 ++static mempool_t *bbr_io_pool;
111 ++
112 ++/**
113 ++ * bbr_binary_tree_destroy
114 ++ *
115 ++ * Destroy the binary tree.
116 ++ **/
117 ++static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root)
118 ++{
119 ++ struct bbr_runtime_remap **link = NULL;
120 ++ struct bbr_runtime_remap *node = root;
121 ++
122 ++ while (node) {
123 ++ if (node->left) {
124 ++ link = &node->left;
125 ++ node = node->left;
126 ++ continue;
127 ++ }
128 ++ if (node->right) {
129 ++ link = &node->right;
130 ++ node = node->right;
131 ++ continue;
132 ++ }
133 ++
134 ++ kmem_cache_free(bbr_remap_cache, node);
135 ++ if (node == root) {
136 ++ /* If root is deleted, we're done. */
137 ++ break;
138 ++ }
139 ++
140 ++ /* Back to root. */
141 ++ node = root;
142 ++ *link = NULL;
143 ++ }
144 ++}
145 ++
146 ++static void bbr_free_remap(struct bbr_private *bbr_id)
147 ++{
148 ++ spin_lock_irq(&bbr_id->remap_root_lock);
149 ++ bbr_binary_tree_destroy(bbr_id->remap_root);
150 ++ bbr_id->remap_root = NULL;
151 ++ spin_unlock_irq(&bbr_id->remap_root_lock);
152 ++}
153 ++
154 ++static struct bbr_private *bbr_alloc_private(void)
155 ++{
156 ++ struct bbr_private *bbr_id;
157 ++
158 ++ bbr_id = kzalloc(sizeof(*bbr_id), GFP_KERNEL);
159 ++ if (bbr_id == NULL)
160 ++ return NULL;
161 ++
162 ++ INIT_WORK(&bbr_id->remap_work, bbr_remap_handler);
163 ++ spin_lock_init(&bbr_id->remap_root_lock);
164 ++ spin_lock_init(&bbr_id->remap_ios_lock);
165 ++ bbr_id->in_use_replacement_blks = (atomic_t) ATOMIC_INIT(0);
166 ++
167 ++ return bbr_id;
168 ++}
169 ++
170 ++static void bbr_free_private(struct bbr_private *bbr_id)
171 ++{
172 ++ vfree(bbr_id->bbr_table);
173 ++ bbr_free_remap(bbr_id);
174 ++ kfree(bbr_id);
175 ++}
176 ++
177 ++static u32 crc_table[256];
178 ++static u32 crc_table_built = 0;
179 ++
180 ++static void build_crc_table(void)
181 ++{
182 ++ u32 i, j, crc;
183 ++
184 ++ for (i = 0; i <= 255; i++) {
185 ++ crc = i;
186 ++ for (j = 8; j > 0; j--) {
187 ++ if (crc & 1)
188 ++ crc = (crc >> 1) ^ CRC_POLYNOMIAL;
189 ++ else
190 ++ crc >>= 1;
191 ++ }
192 ++ crc_table[i] = crc;
193 ++ }
194 ++ crc_table_built = 1;
195 ++}
196 ++
197 ++static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize)
198 ++{
199 ++ unsigned char *current_byte;
200 ++ u32 temp1, temp2, i;
201 ++
202 ++ current_byte = (unsigned char *) buffer;
203 ++ /* Make sure the crc table is available */
204 ++ if (!crc_table_built)
205 ++ build_crc_table();
206 ++ /* Process each byte in the buffer. */
207 ++ for (i = 0; i < buffersize; i++) {
208 ++ temp1 = (crc >> 8) & 0x00FFFFFF;
209 ++ temp2 = crc_table[(crc ^ (u32) * current_byte) &
210 ++ (u32) 0xff];
211 ++ current_byte++;
212 ++ crc = temp1 ^ temp2;
213 ++ }
214 ++ return crc;
215 ++}
216 ++
217 ++/**
218 ++ * le_bbr_table_sector_to_cpu
219 ++ *
220 ++ * Convert bbr meta data from on-disk (LE) format
221 ++ * to the native cpu endian format.
222 ++ **/
223 ++static void le_bbr_table_sector_to_cpu(struct bbr_table *p)
224 ++{
225 ++ int i;
226 ++ p->signature = le32_to_cpup(&p->signature);
227 ++ p->crc = le32_to_cpup(&p->crc);
228 ++ p->sequence_number = le32_to_cpup(&p->sequence_number);
229 ++ p->in_use_cnt = le32_to_cpup(&p->in_use_cnt);
230 ++ for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
231 ++ p->entries[i].bad_sect =
232 ++ le64_to_cpup(&p->entries[i].bad_sect);
233 ++ p->entries[i].replacement_sect =
234 ++ le64_to_cpup(&p->entries[i].replacement_sect);
235 ++ }
236 ++}
237 ++
238 ++/**
239 ++ * cpu_bbr_table_sector_to_le
240 ++ *
241 ++ * Convert bbr meta data from cpu endian format to on-disk (LE) format
242 ++ **/
243 ++static void cpu_bbr_table_sector_to_le(struct bbr_table *p,
244 ++ struct bbr_table *le)
245 ++{
246 ++ int i;
247 ++ le->signature = cpu_to_le32p(&p->signature);
248 ++ le->crc = cpu_to_le32p(&p->crc);
249 ++ le->sequence_number = cpu_to_le32p(&p->sequence_number);
250 ++ le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt);
251 ++ for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
252 ++ le->entries[i].bad_sect =
253 ++ cpu_to_le64p(&p->entries[i].bad_sect);
254 ++ le->entries[i].replacement_sect =
255 ++ cpu_to_le64p(&p->entries[i].replacement_sect);
256 ++ }
257 ++}
258 ++
259 ++/**
260 ++ * validate_bbr_table_sector
261 ++ *
262 ++ * Check the specified BBR table sector for a valid signature and CRC. If it's
263 ++ * valid, endian-convert the table sector.
264 ++ **/
265 ++static int validate_bbr_table_sector(struct bbr_table *p)
266 ++{
267 ++ int org_crc, final_crc;
268 ++
269 ++ if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) {
270 ++ DMERR("BBR table signature doesn't match!");
271 ++ DMERR("Found 0x%x. Expecting 0x%x",
272 ++ le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE);
273 ++ return -EINVAL;
274 ++ }
275 ++
276 ++ if (!p->crc) {
277 ++ DMERR("BBR table sector has no CRC!");
278 ++ return -EINVAL;
279 ++ }
280 ++
281 ++ org_crc = le32_to_cpup(&p->crc);
282 ++ p->crc = 0;
283 ++ final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p));
284 ++ if (final_crc != org_crc) {
285 ++ DMERR("CRC failed!");
286 ++ DMERR("Found 0x%x. Expecting 0x%x",
287 ++ org_crc, final_crc);
288 ++ return -EINVAL;
289 ++ }
290 ++
291 ++ p->crc = cpu_to_le32p(&org_crc);
292 ++ le_bbr_table_sector_to_cpu(p);
293 ++
294 ++ return 0;
295 ++}
296 ++
297 ++/**
298 ++ * bbr_binary_tree_insert
299 ++ *
300 ++ * Insert a node into the binary tree.
301 ++ **/
302 ++static void bbr_binary_tree_insert(struct bbr_runtime_remap **root,
303 ++ struct bbr_runtime_remap *newnode)
304 ++{
305 ++ struct bbr_runtime_remap **node = root;
306 ++ while (node && *node) {
307 ++ node = (newnode->remap.bad_sect > (*node)->remap.bad_sect) ?
308 ++ &(*node)->right : &(*node)->left;
309 ++ }
310 ++
311 ++ newnode->left = newnode->right = NULL;
312 ++ *node = newnode;
313 ++}
314 ++
315 ++/**
316 ++ * bbr_binary_search
317 ++ *
318 ++ * Search for a node that contains bad_sect == lsn.
319 ++ **/
320 ++static struct bbr_runtime_remap *bbr_binary_search(
321 ++ struct bbr_runtime_remap *root,
322 ++ u64 lsn)
323 ++{
324 ++ struct bbr_runtime_remap *node = root;
325 ++ while (node) {
326 ++ if (node->remap.bad_sect == lsn)
327 ++ break;
328 ++
329 ++ node = (lsn > node->remap.bad_sect) ? node->right : node->left;
330 ++ }
331 ++ return node;
332 ++}
333 ++
334 ++/**
335 ++ * bbr_insert_remap_entry
336 ++ *
337 ++ * Create a new remap entry and add it to the binary tree for this node.
338 ++ **/
339 ++static int bbr_insert_remap_entry(struct bbr_private *bbr_id,
340 ++ struct bbr_table_entry *new_bbr_entry)
341 ++{
342 ++ struct bbr_runtime_remap *newnode;
343 ++
344 ++ newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO);
345 ++ if (!newnode) {
346 ++ DMERR("Could not allocate from remap cache!");
347 ++ return -ENOMEM;
348 ++ }
349 ++ newnode->remap.bad_sect = new_bbr_entry->bad_sect;
350 ++ newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
351 ++ spin_lock_irq(&bbr_id->remap_root_lock);
352 ++ bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
353 ++ spin_unlock_irq(&bbr_id->remap_root_lock);
354 ++ return 0;
355 ++}
356 ++
357 ++/**
358 ++ * bbr_table_to_remap_list
359 ++ *
360 ++ * The on-disk bbr table is sorted by the replacement sector LBA. In order to
361 ++ * improve run time performance, the in memory remap list must be sorted by
362 ++ * the bad sector LBA. This function is called at discovery time to initialize
363 ++ * the remap list. This function assumes that at least one copy of meta data
364 ++ * is valid.
365 ++ **/
366 ++static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id)
367 ++{
368 ++ u32 in_use_blks = 0;
369 ++ int i, j;
370 ++ struct bbr_table *p;
371 ++
372 ++ for (i = 0, p = bbr_id->bbr_table;
373 ++ i < bbr_id->nr_sects_bbr_table;
374 ++ i++, p++) {
375 ++ if (!p->in_use_cnt)
376 ++ break;
377 ++
378 ++ in_use_blks += p->in_use_cnt;
379 ++ for (j = 0; j < p->in_use_cnt; j++)
380 ++ bbr_insert_remap_entry(bbr_id, &p->entries[j]);
381 ++ }
382 ++ if (in_use_blks) {
383 ++ char b[32];
384 ++ DMWARN("There are %u BBR entries for device %s",
385 ++ in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev));
386 ++ }
387 ++
388 ++ return in_use_blks;
389 ++}
390 ++
391 ++/**
392 ++ * bbr_search_remap_entry
393 ++ *
394 ++ * Search remap entry for the specified sector. If found, return a pointer to
395 ++ * the table entry. Otherwise, return NULL.
396 ++ **/
397 ++static struct bbr_table_entry *bbr_search_remap_entry(
398 ++ struct bbr_private *bbr_id,
399 ++ u64 lsn)
400 ++{
401 ++ struct bbr_runtime_remap *p;
402 ++
403 ++ spin_lock_irq(&bbr_id->remap_root_lock);
404 ++ p = bbr_binary_search(bbr_id->remap_root, lsn);
405 ++ spin_unlock_irq(&bbr_id->remap_root_lock);
406 ++ return (p) ? &p->remap : NULL;
407 ++}
408 ++
409 ++/**
410 ++ * bbr_remap
411 ++ *
412 ++ * If *lsn is in the remap table, return TRUE and modify *lsn,
413 ++ * else, return FALSE.
414 ++ **/
415 ++static int bbr_remap(struct bbr_private *bbr_id,
416 ++ u64 *lsn)
417 ++{
418 ++ struct bbr_table_entry *e;
419 ++
420 ++ if (atomic_read(&bbr_id->in_use_replacement_blks)) {
421 ++ e = bbr_search_remap_entry(bbr_id, *lsn);
422 ++ if (e) {
423 ++ *lsn = e->replacement_sect;
424 ++ return 1;
425 ++ }
426 ++ }
427 ++ return 0;
428 ++}
429 ++
430 ++/**
431 ++ * bbr_remap_probe
432 ++ *
433 ++ * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
434 ++ * table return TRUE, Else, return FALSE.
435 ++ **/
436 ++static int bbr_remap_probe(struct bbr_private *bbr_id,
437 ++ u64 lsn, u64 nr_sects)
438 ++{
439 ++ u64 tmp, cnt;
440 ++
441 ++ if (atomic_read(&bbr_id->in_use_replacement_blks)) {
442 ++ for (cnt = 0, tmp = lsn;
443 ++ cnt < nr_sects;
444 ++ cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
445 ++ if (bbr_remap(bbr_id,&tmp))
446 ++ return 1;
447 ++ }
448 ++ }
449 ++ return 0;
450 ++}
451 ++
452 ++static int rw_table(struct bbr_private *bbr_id, void *vma,
453 ++ struct io_region *ptr, int rw)
454 ++{
455 ++ bbr_id->vma_io_req.bi_rw = rw;
456 ++ bbr_id->vma_io_req.mem.ptr.vma = vma;
457 ++ bbr_id->vma_io_req.notify.fn = NULL;
458 ++
459 ++ return dm_io(&bbr_id->vma_io_req, 1, ptr, NULL);
460 ++}
461 ++
462 ++static int io_sync(struct bbr_private *bbr_id, struct page_list *pl,
463 ++ unsigned offset, struct io_region *ptr, int rw)
464 ++{
465 ++ bbr_id->page_io_req.bi_rw = rw;
466 ++ bbr_id->page_io_req.mem.ptr.pl = pl;
467 ++ bbr_id->page_io_req.mem.offset = offset;
468 ++ bbr_id->page_io_req.notify.fn = NULL;
469 ++
470 ++ return dm_io(&bbr_id->page_io_req, 1, ptr, NULL);
471 ++}
472 ++
473 ++/**
474 ++ * bbr_setup
475 ++ *
476 ++ * Read the remap tables from disk and set up the initial remap tree.
477 ++ **/
478 ++static int bbr_setup(struct bbr_private *bbr_id)
479 ++{
480 ++ struct bbr_table *table = bbr_id->bbr_table;
481 ++ struct io_region job;
482 ++ int i, rc = 0;
483 ++
484 ++ job.bdev = bbr_id->dev->bdev;
485 ++ job.count = 1;
486 ++
487 ++ /* Read and verify each BBR table sector individually. */
488 ++ for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) {
489 ++ job.sector = bbr_id->lba_table1 + i;
490 ++ rc = rw_table(bbr_id, table, &job, READ);
491 ++ if (rc && bbr_id->lba_table2) {
492 ++ job.sector = bbr_id->lba_table2 + i;
493 ++ rc = rw_table(bbr_id, table, &job, READ);
494 ++ }
495 ++ if (rc)
496 ++ goto out;
497 ++
498 ++ rc = validate_bbr_table_sector(table);
499 ++ if (rc)
500 ++ goto out;
501 ++ }
502 ++ atomic_set(&bbr_id->in_use_replacement_blks,
503 ++ bbr_table_to_remap_list(bbr_id));
504 ++
505 ++out:
506 ++ if (rc)
507 ++ DMERR("error during device setup: %d", rc);
508 ++ return rc;
509 ++}
510 ++
511 ++/**
512 ++ * bbr_io_remap_error
513 ++ * @bbr_id: Private data for the BBR node.
514 ++ * @rw: READ or WRITE.
515 ++ * @starting_lsn: Starting sector of request to remap.
516 ++ * @count: Number of sectors in the request.
517 ++ * @page: Page containing the data for the request.
518 ++ * @offset: Byte-offset of the data within the page.
519 ++ *
520 ++ * For the requested range, try to write each sector individually. For each
521 ++ * sector that fails, find the next available remap location and write the
522 ++ * data to that new location. Then update the table and write both copies
523 ++ * of the table to disk. Finally, update the in-memory mapping and do any
524 ++ * other necessary bookkeeping.
525 ++ **/
526 ++static int bbr_io_remap_error(struct bbr_private *bbr_id,
527 ++ int rw,
528 ++ u64 starting_lsn,
529 ++ u64 count,
530 ++ struct page *page,
531 ++ unsigned int offset)
532 ++{
533 ++ struct bbr_table *bbr_table;
534 ++ struct io_region job;
535 ++ struct page_list pl;
536 ++ unsigned long table_sector_index;
537 ++ unsigned long table_sector_offset;
538 ++ unsigned long index;
539 ++ u64 lsn, new_lsn;
540 ++ char b[32];
541 ++ int rc;
542 ++
543 ++ job.bdev = bbr_id->dev->bdev;
544 ++ job.count = 1;
545 ++ pl.page = page;
546 ++ pl.next = NULL;
547 ++
548 ++ /* For each sector in the request. */
549 ++ for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) {
550 ++ job.sector = starting_lsn + lsn;
551 ++ rc = io_sync(bbr_id, &pl, offset, &job, rw);
552 ++ while (rc) {
553 ++ /* Find the next available relocation sector. */
554 ++ new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
555 ++ if (new_lsn >= bbr_id->nr_replacement_blks) {
556 ++ /* No more replacement sectors available. */
557 ++ return -EIO;
558 ++ }
559 ++ new_lsn += bbr_id->start_replacement_sect;
560 ++
561 ++ /* Write the data to its new location. */
562 ++ DMWARN("device %s: Trying to remap bad sector "PFU64" to sector "PFU64,
563 ++ format_dev_t(b, bbr_id->dev->bdev->bd_dev),
564 ++ starting_lsn + lsn, new_lsn);
565 ++ job.sector = new_lsn;
566 ++ rc = io_sync(bbr_id, &pl, offset, &job, rw);
567 ++ if (rc) {
568 ++ /* This replacement sector is bad.
569 ++ * Try the next one.
570 ++ */
571 ++ DMERR("device %s: replacement sector "PFU64" is bad. Skipping.",
572 ++ format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn);
573 ++ atomic_inc(&bbr_id->in_use_replacement_blks);
574 ++ continue;
575 ++ }
576 ++
577 ++ /* Add this new entry to the on-disk table. */
578 ++ table_sector_index = new_lsn -
579 ++ bbr_id->start_replacement_sect;
580 ++ table_sector_offset = table_sector_index /
581 ++ BBR_ENTRIES_PER_SECT;
582 ++ index = table_sector_index % BBR_ENTRIES_PER_SECT;
583 ++
584 ++ bbr_table = &bbr_id->bbr_table[table_sector_offset];
585 ++ bbr_table->entries[index].bad_sect = starting_lsn + lsn;
586 ++ bbr_table->entries[index].replacement_sect = new_lsn;
587 ++ bbr_table->in_use_cnt++;
588 ++ bbr_table->sequence_number++;
589 ++ bbr_table->crc = 0;
590 ++ bbr_table->crc = calculate_crc(INITIAL_CRC,
591 ++ bbr_table,
592 ++ sizeof(struct bbr_table));
593 ++
594 ++ /* Write the table to disk. */
595 ++ cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
596 ++ if (bbr_id->lba_table1) {
597 ++ job.sector = bbr_id->lba_table1 + table_sector_offset;
598 ++ rc = rw_table(bbr_id, bbr_table, &job, WRITE);
599 ++ }
600 ++ if (bbr_id->lba_table2) {
601 ++ job.sector = bbr_id->lba_table2 + table_sector_offset;
602 ++ rc |= rw_table(bbr_id, bbr_table, &job, WRITE);
603 ++ }
604 ++ le_bbr_table_sector_to_cpu(bbr_table);
605 ++
606 ++ if (rc) {
607 ++ /* Error writing one of the tables to disk. */
608 ++ DMERR("device %s: error updating BBR tables on disk.",
609 ++ format_dev_t(b, bbr_id->dev->bdev->bd_dev));
610 ++ return rc;
611 ++ }
612 ++
613 ++ /* Insert a new entry in the remapping binary-tree. */
614 ++ rc = bbr_insert_remap_entry(bbr_id,
615 ++ &bbr_table->entries[index]);
616 ++ if (rc) {
617 ++ DMERR("device %s: error adding new entry to remap tree.",
618 ++ format_dev_t(b, bbr_id->dev->bdev->bd_dev));
619 ++ return rc;
620 ++ }
621 ++
622 ++ atomic_inc(&bbr_id->in_use_replacement_blks);
623 ++ }
624 ++ }
625 ++
626 ++ return 0;
627 ++}
628 ++
629 ++/**
630 ++ * bbr_io_process_request
631 ++ *
632 ++ * For each sector in this request, check if the sector has already
633 ++ * been remapped. If so, process all previous sectors in the request,
634 ++ * followed by the remapped sector. Then reset the starting lsn and
635 ++ * count, and keep going with the rest of the request as if it were
636 ++ * a whole new request. If any of the sync_io's return an error,
637 ++ * call the remapper to relocate the bad sector(s).
638 ++ *
639 ++ * 2.5 Note: When switching over to bio's for the I/O path, we have made
640 ++ * the assumption that the I/O request described by the bio is one
641 ++ * virtually contiguous piece of memory (even though the bio vector
642 ++ * describes it using a series of physical page addresses).
643 ++ **/
644 ++static int bbr_io_process_request(struct bbr_private *bbr_id,
645 ++ struct bio *bio)
646 ++{
647 ++ struct io_region job;
648 ++ u64 starting_lsn = bio->bi_sector;
649 ++ u64 count, lsn, remapped_lsn;
650 ++ struct page_list pl;
651 ++ unsigned int offset;
652 ++ int i, rw = bio_data_dir(bio);
653 ++ int rc = 0;
654 ++
655 ++ job.bdev = bbr_id->dev->bdev;
656 ++ pl.next = NULL;
657 ++
658 ++ /* Each bio can contain multiple vectors, each with a different page.
659 ++ * Treat each vector as a separate request.
660 ++ */
661 ++ /* KMC: Is this the right way to walk the bvec list? */
662 ++ for (i = 0;
663 ++ i < bio->bi_vcnt;
664 ++ i++, bio->bi_idx++, starting_lsn += count) {
665 ++
666 ++ /* Bvec info: number of sectors, page,
667 ++ * and byte-offset within page.
668 ++ */
669 ++ count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT;
670 ++ pl.page = bio_iovec(bio)->bv_page;
671 ++ offset = bio_iovec(bio)->bv_offset;
672 ++
673 ++ /* For each sector in this bvec, check if the sector has
674 ++ * already been remapped. If so, process all previous sectors
675 ++ * in this request, followed by the remapped sector. Then reset
676 ++ * the starting lsn and count and keep going with the rest of
677 ++ * the request as if it were a whole new request.
678 ++ */
679 ++ for (lsn = 0; lsn < count; lsn++) {
680 ++ remapped_lsn = starting_lsn + lsn;
681 ++ rc = bbr_remap(bbr_id, &remapped_lsn);
682 ++ if (!rc) {
683 ++ /* This sector is fine. */
684 ++ continue;
685 ++ }
686 ++
687 ++ /* Process all sectors in the request up to this one. */
688 ++ if (lsn > 0) {
689 ++ job.sector = starting_lsn;
690 ++ job.count = lsn;
691 ++ rc = io_sync(bbr_id, &pl, offset, &job, rw);
692 ++ if (rc) {
693 ++ /* If this I/O failed, then one of the
694 ++ * sectors in this request needs to be
695 ++ * relocated.
696 ++ */
697 ++ rc = bbr_io_remap_error(bbr_id, rw,
698 ++ starting_lsn,
699 ++ lsn, pl.page,
700 ++ offset);
701 ++ if (rc) {
702 ++ /* KMC: Return? Or continue to next bvec? */
703 ++ return rc;
704 ++ }
705 ++ }
706 ++ offset += (lsn << SECTOR_SHIFT);
707 ++ }
708 ++
709 ++ /* Process the remapped sector. */
710 ++ job.sector = remapped_lsn;
711 ++ job.count = 1;
712 ++ rc = io_sync(bbr_id, &pl, offset, &job, rw);
713 ++ if (rc) {
714 ++ /* BUGBUG - Need more processing if this caused
715 ++ * an error. If this I/O failed, then the
716 ++ * existing remap is now bad, and we need to
717 ++ * find a new remap. Can't use
718 ++ * bbr_io_remap_error(), because the existing
719 ++ * map entry needs to be changed, not added
720 ++ * again, and the original table entry also
721 ++ * needs to be changed.
722 ++ */
723 ++ return rc;
724 ++ }
725 ++
726 ++ starting_lsn += (lsn + 1);
727 ++ count -= (lsn + 1);
728 ++ lsn = -1;
729 ++ offset += SECTOR_SIZE;
730 ++ }
731 ++
732 ++ /* Check for any remaining sectors after the last split. This
733 ++ * could potentially be the whole request, but that should be a
734 ++ * rare case because requests should only be processed by the
735 ++ * thread if we know an error occurred or they contained one or
736 ++ * more remapped sectors.
737 ++ */
738 ++ if (count) {
739 ++ job.sector = starting_lsn;
740 ++ job.count = count;
741 ++ rc = io_sync(bbr_id, &pl, offset, &job, rw);
742 ++ if (rc) {
743 ++ /* If this I/O failed, then one of the sectors
744 ++ * in this request needs to be relocated.
745 ++ */
746 ++ rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
747 ++ count, pl.page, offset);
748 ++ if (rc) {
749 ++ /* KMC: Return? Or continue to next bvec? */
750 ++ return rc;
751 ++ }
752 ++ }
753 ++ }
754 ++ }
755 ++
756 ++ return 0;
757 ++}
758 ++
759 ++static void bbr_io_process_requests(struct bbr_private *bbr_id,
760 ++ struct bio *bio)
761 ++{
762 ++ struct bio *next;
763 ++ int rc;
764 ++
765 ++ while (bio) {
766 ++ next = bio->bi_next;
767 ++ bio->bi_next = NULL;
768 ++
769 ++ rc = bbr_io_process_request(bbr_id, bio);
770 ++
771 ++ bio_endio(bio, bio->bi_size, rc);
772 ++
773 ++ bio = next;
774 ++ }
775 ++}
776 ++
777 ++/**
778 ++ * bbr_remap_handler
779 ++ *
780 ++ * This is the handler for the bbr work-queue.
781 ++ *
782 ++ * I/O requests should only be sent to this handler if we know that:
783 ++ * a) the request contains at least one remapped sector.
784 ++ * or
785 ++ * b) the request caused an error on the normal I/O path.
786 ++ *
787 ++ * This function uses synchronous I/O, so sending a request to this
788 ++ * thread that doesn't need special processing will cause severe
789 ++ * performance degredation.
790 ++ **/
791 ++static void bbr_remap_handler(struct work_struct *work)
792 ++{
793 ++ struct bbr_private *bbr_id =
794 ++ container_of(work, struct bbr_private, remap_work);
795 ++ struct bio *bio;
796 ++ unsigned long flags;
797 ++
798 ++ spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
799 ++ bio = bio_list_get(&bbr_id->remap_ios);
800 ++ spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
801 ++
802 ++ bbr_io_process_requests(bbr_id, bio);
803 ++}
804 ++
805 ++/**
806 ++ * bbr_endio
807 ++ *
808 ++ * This is the callback for normal write requests. Check for an error
809 ++ * during the I/O, and send to the thread for processing if necessary.
810 ++ **/
811 ++static int bbr_endio(struct dm_target *ti, struct bio *bio,
812 ++ int error, union map_info *map_context)
813 ++{
814 ++ struct bbr_private *bbr_id = ti->private;
815 ++ struct dm_bio_details *bbr_io = map_context->ptr;
816 ++
817 ++ if (error && bbr_io) {
818 ++ unsigned long flags;
819 ++ char b[32];
820 ++
821 ++ dm_bio_restore(bbr_io, bio);
822 ++ map_context->ptr = NULL;
823 ++
824 ++ DMERR("device %s: I/O failure on sector %lu. "
825 ++ "Scheduling for retry.",
826 ++ format_dev_t(b, bbr_id->dev->bdev->bd_dev),
827 ++ (unsigned long)bio->bi_sector);
828 ++
829 ++ spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
830 ++ bio_list_add(&bbr_id->remap_ios, bio);
831 ++ spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
832 ++
833 ++ queue_work(dm_bbr_wq, &bbr_id->remap_work);
834 ++
835 ++ error = 1;
836 ++ }
837 ++
838 ++ if (bbr_io)
839 ++ mempool_free(bbr_io, bbr_io_pool);
840 ++
841 ++ return error;
842 ++}
843 ++
844 ++/**
845 ++ * Construct a bbr mapping
846 ++ **/
847 ++static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv)
848 ++{
849 ++ struct bbr_private *bbr_id;
850 ++ unsigned long block_size;
851 ++ char *end;
852 ++ int rc = -EINVAL;
853 ++
854 ++ if (argc != 8) {
855 ++ ti->error = "dm-bbr requires exactly 8 arguments: "
856 ++ "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size";
857 ++ goto out1;
858 ++ }
859 ++
860 ++ bbr_id = bbr_alloc_private();
861 ++ if (!bbr_id) {
862 ++ ti->error = "dm-bbr: Error allocating bbr private data.";
863 ++ goto out1;
864 ++ }
865 ++
866 ++ bbr_id->offset = simple_strtoull(argv[1], &end, 10);
867 ++ bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10);
868 ++ bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10);
869 ++ bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10);
870 ++ bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10);
871 ++ bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10);
872 ++ block_size = simple_strtoul(argv[7], &end, 10);
873 ++ bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT);
874 ++
875 ++ bbr_id->vma_io_req.mem.type = DM_IO_VMA;
876 ++ bbr_id->vma_io_req.client = dm_io_client_create(1);
877 ++ if (IS_ERR(bbr_id->vma_io_req.client)) {
878 ++ rc = PTR_ERR(bbr_id->vma_io_req.client);
879 ++ DMWARN("couldn't allocate disk VMA io client");
880 ++ goto out2;
881 ++ }
882 ++
883 ++ bbr_id->page_io_req.mem.type = DM_IO_PAGE_LIST;
884 ++ bbr_id->page_io_req.client = dm_io_client_create(1);
885 ++ if (IS_ERR(bbr_id->page_io_req.client)) {
886 ++ rc = PTR_ERR(bbr_id->page_io_req.client);
887 ++ DMWARN("couldn't allocate pagelist io client");
888 ++ goto out3;
889 ++ }
890 ++
891 ++ bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT);
892 ++ if (!bbr_id->bbr_table) {
893 ++ ti->error = "dm-bbr: Error allocating bbr table.";
894 ++ goto out4;
895 ++ }
896 ++
897 ++ if (dm_get_device(ti, argv[0], 0, ti->len,
898 ++ dm_table_get_mode(ti->table), &bbr_id->dev)) {
899 ++ ti->error = "dm-bbr: Device lookup failed";
900 ++ goto out4;
901 ++ }
902 ++
903 ++ rc = bbr_setup(bbr_id);
904 ++ if (rc) {
905 ++ ti->error = "dm-bbr: Device setup failed";
906 ++ goto out5;
907 ++ }
908 ++
909 ++ ti->private = bbr_id;
910 ++ return 0;
911 ++
912 ++out5:
913 ++ dm_put_device(ti, bbr_id->dev);
914 ++out4:
915 ++ dm_io_client_destroy(bbr_id->page_io_req.client);
916 ++out3:
917 ++ dm_io_client_destroy(bbr_id->vma_io_req.client);
918 ++out2:
919 ++ bbr_free_private(bbr_id);
920 ++out1:
921 ++ return rc;
922 ++}
923 ++
924 ++static void bbr_dtr(struct dm_target *ti)
925 ++{
926 ++ struct bbr_private *bbr_id = ti->private;
927 ++
928 ++ dm_put_device(ti, bbr_id->dev);
929 ++ dm_io_client_destroy(bbr_id->page_io_req.client);
930 ++ dm_io_client_destroy(bbr_id->vma_io_req.client);
931 ++ bbr_free_private(bbr_id);
932 ++}
933 ++
934 ++static int bbr_map(struct dm_target *ti, struct bio *bio,
935 ++ union map_info *map_context)
936 ++{
937 ++ struct bbr_private *bbr_id = ti->private;
938 ++ struct dm_bio_details *bbr_io;
939 ++ unsigned long flags;
940 ++ int rc = 1;
941 ++
942 ++ bio->bi_sector += bbr_id->offset;
943 ++
944 ++ if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
945 ++ !bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) {
946 ++ /* No existing remaps or this request doesn't
947 ++ * contain any remapped sectors.
948 ++ */
949 ++ bio->bi_bdev = bbr_id->dev->bdev;
950 ++
951 ++ bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO);
952 ++ dm_bio_record(bbr_io, bio);
953 ++ map_context->ptr = bbr_io;
954 ++ } else {
955 ++ /* This request has at least one remapped sector.
956 ++ * Give it to the work-queue for processing.
957 ++ */
958 ++ map_context->ptr = NULL;
959 ++ spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
960 ++ bio_list_add(&bbr_id->remap_ios, bio);
961 ++ spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
962 ++
963 ++ queue_work(dm_bbr_wq, &bbr_id->remap_work);
964 ++ rc = 0;
965 ++ }
966 ++
967 ++ return rc;
968 ++}
969 ++
970 ++static int bbr_status(struct dm_target *ti, status_type_t type,
971 ++ char *result, unsigned int maxlen)
972 ++{
973 ++ struct bbr_private *bbr_id = ti->private;
974 ++ char b[BDEVNAME_SIZE];
975 ++
976 ++ switch (type) {
977 ++ case STATUSTYPE_INFO:
978 ++ result[0] = '\0';
979 ++ break;
980 ++
981 ++ case STATUSTYPE_TABLE:
982 ++ snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u",
983 ++ format_dev_t(b, bbr_id->dev->bdev->bd_dev),
984 ++ bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2,
985 ++ bbr_id->nr_sects_bbr_table,
986 ++ bbr_id->start_replacement_sect,
987 ++ bbr_id->nr_replacement_blks,
988 ++ bbr_id->blksize_in_sects << SECTOR_SHIFT);
989 ++ break;
990 ++ }
991 ++ return 0;
992 ++}
993 ++
994 ++static struct target_type bbr_target = {
995 ++ .name = "bbr",
996 ++ .version= {1, 0, 1},
997 ++ .module = THIS_MODULE,
998 ++ .ctr = bbr_ctr,
999 ++ .dtr = bbr_dtr,
1000 ++ .map = bbr_map,
1001 ++ .end_io = bbr_endio,
1002 ++ .status = bbr_status,
1003 ++};
1004 ++
1005 ++int __init dm_bbr_init(void)
1006 ++{
1007 ++ int rc;
1008 ++
1009 ++ rc = dm_register_target(&bbr_target);
1010 ++ if (rc) {
1011 ++ DMERR("error registering target.");
1012 ++ goto err1;
1013 ++ }
1014 ++
1015 ++ bbr_remap_cache = kmem_cache_create("bbr-remap",
1016 ++ sizeof(struct bbr_runtime_remap),
1017 ++ 0, SLAB_HWCACHE_ALIGN, NULL);
1018 ++ if (!bbr_remap_cache) {
1019 ++ DMERR("error creating remap cache.");
1020 ++ rc = ENOMEM;
1021 ++ goto err2;
1022 ++ }
1023 ++
1024 ++ bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details),
1025 ++ 0, SLAB_HWCACHE_ALIGN, NULL);
1026 ++ if (!bbr_io_cache) {
1027 ++ DMERR("error creating io cache.");
1028 ++ rc = ENOMEM;
1029 ++ goto err3;
1030 ++ }
1031 ++
1032 ++ bbr_io_pool = mempool_create(256, mempool_alloc_slab,
1033 ++ mempool_free_slab, bbr_io_cache);
1034 ++ if (!bbr_io_pool) {
1035 ++ DMERR("error creating io mempool.");
1036 ++ rc = ENOMEM;
1037 ++ goto err4;
1038 ++ }
1039 ++
1040 ++ dm_bbr_wq = create_workqueue("dm-bbr");
1041 ++ if (!dm_bbr_wq) {
1042 ++ DMERR("error creating work-queue.");
1043 ++ rc = ENOMEM;
1044 ++ goto err5;
1045 ++ }
1046 ++
1047 ++ return 0;
1048 ++
1049 ++err5:
1050 ++ mempool_destroy(bbr_io_pool);
1051 ++err4:
1052 ++ kmem_cache_destroy(bbr_io_cache);
1053 ++err3:
1054 ++ kmem_cache_destroy(bbr_remap_cache);
1055 ++err2:
1056 ++ dm_unregister_target(&bbr_target);
1057 ++err1:
1058 ++ return rc;
1059 ++}
1060 ++
1061 ++void __exit dm_bbr_exit(void)
1062 ++{
1063 ++ destroy_workqueue(dm_bbr_wq);
1064 ++ mempool_destroy(bbr_io_pool);
1065 ++ kmem_cache_destroy(bbr_io_cache);
1066 ++ kmem_cache_destroy(bbr_remap_cache);
1067 ++ dm_unregister_target(&bbr_target);
1068 ++}
1069 ++
1070 ++module_init(dm_bbr_init);
1071 ++module_exit(dm_bbr_exit);
1072 ++MODULE_LICENSE("GPL");
1073 +--- /dev/null
1074 ++++ b/drivers/md/dm-bbr.h
1075 +@@ -0,0 +1,130 @@
1076 ++/*
1077 ++ * (C) Copyright IBM Corp. 2002, 2004
1078 ++ *
1079 ++ * This program is free software; you can redistribute it and/or modify
1080 ++ * it under the terms of the GNU General Public License as published by
1081 ++ * the Free Software Foundation; either version 2 of the License, or
1082 ++ * (at your option) any later version.
1083 ++ *
1084 ++ * This program is distributed in the hope that it will be useful,
1085 ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1086 ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
1087 ++ * the GNU General Public License for more details.
1088 ++ *
1089 ++ * You should have received a copy of the GNU General Public License
1090 ++ * along with this program; if not, write to the Free Software
1091 ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1092 ++ *
1093 ++ * linux/drivers/md/dm-bbr.h
1094 ++ *
1095 ++ * Bad-block-relocation (BBR) target for device-mapper.
1096 ++ *
1097 ++ * The BBR target is designed to remap I/O write failures to another safe
1098 ++ * location on disk. Note that most disk drives have BBR built into them,
1099 ++ * this means that our software BBR will be only activated when all hardware
1100 ++ * BBR replacement sectors have been used.
1101 ++ */
1102 ++
1103 ++#include "dm-io.h"
1104 ++
1105 ++#define BBR_TABLE_SIGNATURE 0x42627254 /* BbrT */
1106 ++#define BBR_ENTRIES_PER_SECT 31
1107 ++#define INITIAL_CRC 0xFFFFFFFF
1108 ++#define CRC_POLYNOMIAL 0xEDB88320L
1109 ++
1110 ++/**
1111 ++ * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines.
1112 ++ * Use these in place of %Ld, %Lu, and %Lx.
1113 ++ **/
1114 ++#if BITS_PER_LONG > 32
1115 ++#define PFU64 "%llu"
1116 ++#else
1117 ++#define PFU64 "%Lu"
1118 ++#endif
1119 ++
1120 ++/**
1121 ++ * struct bbr_table_entry
1122 ++ * @bad_sect: LBA of bad location.
1123 ++ * @replacement_sect: LBA of new location.
1124 ++ *
1125 ++ * Structure to describe one BBR remap.
1126 ++ **/
1127 ++struct bbr_table_entry {
1128 ++ u64 bad_sect;
1129 ++ u64 replacement_sect;
1130 ++};
1131 ++
1132 ++/**
1133 ++ * struct bbr_table
1134 ++ * @signature: Signature on each BBR table sector.
1135 ++ * @crc: CRC for this table sector.
1136 ++ * @sequence_number: Used to resolve conflicts when primary and secondary
1137 ++ * tables do not match.
1138 ++ * @in_use_cnt: Number of in-use table entries.
1139 ++ * @entries: Actual table of remaps.
1140 ++ *
1141 ++ * Structure to describe each sector of the metadata table. Each sector in this
1142 ++ * table can describe 31 remapped sectors.
1143 ++ **/
1144 ++struct bbr_table {
1145 ++ u32 signature;
1146 ++ u32 crc;
1147 ++ u32 sequence_number;
1148 ++ u32 in_use_cnt;
1149 ++ struct bbr_table_entry entries[BBR_ENTRIES_PER_SECT];
1150 ++};
1151 ++
1152 ++/**
1153 ++ * struct bbr_runtime_remap
1154 ++ *
1155 ++ * Node in the binary tree used to keep track of remaps.
1156 ++ **/
1157 ++struct bbr_runtime_remap {
1158 ++ struct bbr_table_entry remap;
1159 ++ struct bbr_runtime_remap *left;
1160 ++ struct bbr_runtime_remap *right;
1161 ++};
1162 ++
1163 ++/**
1164 ++ * struct bbr_private
1165 ++ * @dev: Info about underlying device.
1166 ++ * @bbr_table: Copy of metadata table.
1167 ++ * @remap_root: Binary tree containing all remaps.
1168 ++ * @remap_root_lock: Lock for the binary tree.
1169 ++ * @remap_work: For adding work items to the work-queue.
1170 ++ * @remap_ios: List of I/Os for the work-queue to handle.
1171 ++ * @remap_ios_lock: Lock for the remap_ios list.
1172 ++ * @offset: LBA of data area.
1173 ++ * @lba_table1: LBA of primary BBR table.
1174 ++ * @lba_table2: LBA of secondary BBR table.
1175 ++ * @nr_sects_bbr_table: Size of each BBR table.
1176 ++ * @nr_replacement_blks: Number of replacement blocks.
1177 ++ * @start_replacement_sect: LBA of start of replacement blocks.
1178 ++ * @blksize_in_sects: Size of each block.
1179 ++ * @in_use_replacement_blks: Current number of remapped blocks.
1180 ++ *
1181 ++ * Private data for each BBR target.
1182 ++ **/
1183 ++struct bbr_private {
1184 ++ struct dm_dev *dev;
1185 ++ struct bbr_table *bbr_table;
1186 ++ struct bbr_runtime_remap *remap_root;
1187 ++ spinlock_t remap_root_lock;
1188 ++
1189 ++ struct dm_io_request vma_io_req;
1190 ++ struct dm_io_request page_io_req;
1191 ++
1192 ++ struct work_struct remap_work;
1193 ++ struct bio_list remap_ios;
1194 ++ spinlock_t remap_ios_lock;
1195 ++
1196 ++ u64 offset;
1197 ++ u64 lba_table1;
1198 ++ u64 lba_table2;
1199 ++ u64 nr_sects_bbr_table;
1200 ++ u64 start_replacement_sect;
1201 ++ u64 nr_replacement_blks;
1202 ++ u32 blksize_in_sects;
1203 ++ atomic_t in_use_replacement_blks;
1204 ++};
1205 ++
1206
1207 Added: hardened-sources/2.6/tags/2.6.23-5/4300_squashfs-3.2-r2.patch
1208 ===================================================================
1209 --- hardened-sources/2.6/tags/2.6.23-5/4300_squashfs-3.2-r2.patch (rev 0)
1210 +++ hardened-sources/2.6/tags/2.6.23-5/4300_squashfs-3.2-r2.patch 2008-04-30 11:40:48 UTC (rev 95)
1211 @@ -0,0 +1,4389 @@
1212 +---
1213 + fs/Kconfig | 65 +
1214 + fs/Makefile | 1
1215 + fs/squashfs/Makefile | 7
1216 + fs/squashfs/inode.c | 2327 +++++++++++++++++++++++++++++++++++++++++
1217 + fs/squashfs/squashfs.h | 87 +
1218 + fs/squashfs/squashfs2_0.c | 742 +++++++++++++
1219 + include/linux/squashfs_fs.h | 934 ++++++++++++++++
1220 + include/linux/squashfs_fs_i.h | 45
1221 + include/linux/squashfs_fs_sb.h | 74 +
1222 + init/do_mounts_rd.c | 16
1223 + 10 files changed, 4298 insertions(+)
1224 +
1225 +--- a/fs/Kconfig
1226 ++++ b/fs/Kconfig
1227 +@@ -1364,6 +1364,71 @@ config CRAMFS
1228 +
1229 + If unsure, say N.
1230 +
1231 ++config SQUASHFS
1232 ++ tristate "SquashFS 3.2 - Squashed file system support"
1233 ++ select ZLIB_INFLATE
1234 ++ help
1235 ++ Saying Y here includes support for SquashFS 3.2 (a Compressed Read-Only File
1236 ++ System). Squashfs is a highly compressed read-only filesystem for Linux.
1237 ++ It uses zlib compression to compress both files, inodes and directories.
1238 ++ Inodes in the system are very small and all blocks are packed to minimise
1239 ++ data overhead. Block sizes greater than 4K are supported up to a maximum of 64K.
1240 ++ SquashFS 3.1 supports 64 bit filesystems and files (larger than 4GB), full
1241 ++ uid/gid information, hard links and timestamps.
1242 ++
1243 ++ Squashfs is intended for general read-only filesystem use, for archival
1244 ++ use (i.e. in cases where a .tar.gz file may be used), and in embedded
1245 ++ systems where low overhead is needed. Further information and filesystem tools
1246 ++ are available from http://squashfs.sourceforge.net.
1247 ++
1248 ++ If you want to compile this as a module ( = code which can be
1249 ++ inserted in and removed from the running kernel whenever you want),
1250 ++ say M here and read <file:Documentation/modules.txt>. The module
1251 ++ will be called squashfs. Note that the root file system (the one
1252 ++ containing the directory /) cannot be compiled as a module.
1253 ++
1254 ++ If unsure, say N.
1255 ++
1256 ++config SQUASHFS_EMBEDDED
1257 ++
1258 ++ bool "Additional options for memory-constrained systems"
1259 ++ depends on SQUASHFS
1260 ++ default n
1261 ++ help
1262 ++ Saying Y here allows you to specify cache sizes and how Squashfs
1263 ++ allocates memory. This is only intended for memory constrained
1264 ++ systems.
1265 ++
1266 ++ If unsure, say N.
1267 ++
1268 ++config SQUASHFS_FRAGMENT_CACHE_SIZE
1269 ++ int "Number of fragments cached" if SQUASHFS_EMBEDDED
1270 ++ depends on SQUASHFS
1271 ++ default "3"
1272 ++ help
1273 ++ By default SquashFS caches the last 3 fragments read from
1274 ++ the filesystem. Increasing this amount may mean SquashFS
1275 ++ has to re-read fragments less often from disk, at the expense
1276 ++ of extra system memory. Decreasing this amount will mean
1277 ++ SquashFS uses less memory at the expense of extra reads from disk.
1278 ++
1279 ++ Note there must be at least one cached fragment. Anything
1280 ++ much more than three will probably not make much difference.
1281 ++
1282 ++config SQUASHFS_VMALLOC
1283 ++ bool "Use Vmalloc rather than Kmalloc" if SQUASHFS_EMBEDDED
1284 ++ depends on SQUASHFS
1285 ++ default n
1286 ++ help
1287 ++ By default SquashFS uses kmalloc to obtain fragment cache memory.
1288 ++ Kmalloc memory is the standard kernel allocator, but it can fail
1289 ++ on memory constrained systems. Because of the way Vmalloc works,
1290 ++ Vmalloc can succeed when kmalloc fails. Specifying this option
1291 ++ will make SquashFS always use Vmalloc to allocate the
1292 ++ fragment cache memory.
1293 ++
1294 ++ If unsure, say N.
1295 ++
1296 + config VXFS_FS
1297 + tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
1298 + depends on BLOCK
1299 +--- a/fs/Makefile
1300 ++++ b/fs/Makefile
1301 +@@ -72,6 +72,7 @@ obj-$(CONFIG_JBD) += jbd/
1302 + obj-$(CONFIG_JBD2) += jbd2/
1303 + obj-$(CONFIG_EXT2_FS) += ext2/
1304 + obj-$(CONFIG_CRAMFS) += cramfs/
1305 ++obj-$(CONFIG_SQUASHFS) += squashfs/
1306 + obj-$(CONFIG_RAMFS) += ramfs/
1307 + obj-$(CONFIG_HUGETLBFS) += hugetlbfs/
1308 + obj-$(CONFIG_CODA_FS) += coda/
1309 +--- /dev/null
1310 ++++ b/fs/squashfs/Makefile
1311 +@@ -0,0 +1,7 @@
1312 ++#
1313 ++# Makefile for the linux squashfs routines.
1314 ++#
1315 ++
1316 ++obj-$(CONFIG_SQUASHFS) += squashfs.o
1317 ++squashfs-y += inode.o
1318 ++squashfs-y += squashfs2_0.o
1319 +--- /dev/null
1320 ++++ b/fs/squashfs/inode.c
1321 +@@ -0,0 +1,2329 @@
1322 ++/*
1323 ++ * Squashfs - a compressed read only filesystem for Linux
1324 ++ *
1325 ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007
1326 ++ * Phillip Lougher <phillip@×××××××××××.uk>
1327 ++ *
1328 ++ * This program is free software; you can redistribute it and/or
1329 ++ * modify it under the terms of the GNU General Public License
1330 ++ * as published by the Free Software Foundation; either version 2,
1331 ++ * or (at your option) any later version.
1332 ++ *
1333 ++ * This program is distributed in the hope that it will be useful,
1334 ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1335 ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1336 ++ * GNU General Public License for more details.
1337 ++ *
1338 ++ * You should have received a copy of the GNU General Public License
1339 ++ * along with this program; if not, write to the Free Software
1340 ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
1341 ++ *
1342 ++ * inode.c
1343 ++ */
1344 ++
1345 ++#include <linux/squashfs_fs.h>
1346 ++#include <linux/module.h>
1347 ++#include <linux/zlib.h>
1348 ++#include <linux/exportfs.h>
1349 ++#include <linux/fs.h>
1350 ++#include <linux/squashfs_fs_sb.h>
1351 ++#include <linux/squashfs_fs_i.h>
1352 ++#include <linux/buffer_head.h>
1353 ++#include <linux/vfs.h>
1354 ++#include <linux/vmalloc.h>
1355 ++#include <linux/smp_lock.h>
1356 ++#include <linux/sched.h>
1357 ++
1358 ++#include "squashfs.h"
1359 ++
1360 ++static void vfs_read_inode(struct inode *i);
1361 ++static struct dentry *squashfs_get_parent(struct dentry *child);
1362 ++static int squashfs_read_inode(struct inode *i, squashfs_inode_t inode);
1363 ++static int squashfs_statfs(struct dentry *, struct kstatfs *);
1364 ++static int squashfs_symlink_readpage(struct file *file, struct page *page);
1365 ++static long long read_blocklist(struct inode *inode, int index,
1366 ++ int readahead_blks, char *block_list,
1367 ++ unsigned short **block_p, unsigned int *bsize);
1368 ++static int squashfs_readpage(struct file *file, struct page *page);
1369 ++static int squashfs_readpage4K(struct file *file, struct page *page);
1370 ++static int squashfs_readdir(struct file *, void *, filldir_t);
1371 ++static struct dentry *squashfs_lookup(struct inode *, struct dentry *,
1372 ++ struct nameidata *);
1373 ++static int squashfs_remount(struct super_block *s, int *flags, char *data);
1374 ++static void squashfs_put_super(struct super_block *);
1375 ++static int squashfs_get_sb(struct file_system_type *,int, const char *, void *,
1376 ++ struct vfsmount *);
1377 ++static struct inode *squashfs_alloc_inode(struct super_block *sb);
1378 ++static void squashfs_destroy_inode(struct inode *inode);
1379 ++static int init_inodecache(void);
1380 ++static void destroy_inodecache(void);
1381 ++
1382 ++static struct file_system_type squashfs_fs_type = {
1383 ++ .owner = THIS_MODULE,
1384 ++ .name = "squashfs",
1385 ++ .get_sb = squashfs_get_sb,
1386 ++ .kill_sb = kill_block_super,
1387 ++ .fs_flags = FS_REQUIRES_DEV
1388 ++};
1389 ++
1390 ++static const unsigned char squashfs_filetype_table[] = {
1391 ++ DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK
1392 ++};
1393 ++
1394 ++static struct super_operations squashfs_super_ops = {
1395 ++ .alloc_inode = squashfs_alloc_inode,
1396 ++ .destroy_inode = squashfs_destroy_inode,
1397 ++ .statfs = squashfs_statfs,
1398 ++ .put_super = squashfs_put_super,
1399 ++ .remount_fs = squashfs_remount
1400 ++};
1401 ++
1402 ++static struct super_operations squashfs_export_super_ops = {
1403 ++ .alloc_inode = squashfs_alloc_inode,
1404 ++ .destroy_inode = squashfs_destroy_inode,
1405 ++ .statfs = squashfs_statfs,
1406 ++ .put_super = squashfs_put_super,
1407 ++ .read_inode = vfs_read_inode
1408 ++};
1409 ++
1410 ++static struct export_operations squashfs_export_ops = {
1411 ++ .get_parent = squashfs_get_parent
1412 ++};
1413 ++
1414 ++SQSH_EXTERN const struct address_space_operations squashfs_symlink_aops = {
1415 ++ .readpage = squashfs_symlink_readpage
1416 ++};
1417 ++
1418 ++SQSH_EXTERN const struct address_space_operations squashfs_aops = {
1419 ++ .readpage = squashfs_readpage
1420 ++};
1421 ++
1422 ++SQSH_EXTERN const struct address_space_operations squashfs_aops_4K = {
1423 ++ .readpage = squashfs_readpage4K
1424 ++};
1425 ++
1426 ++static const struct file_operations squashfs_dir_ops = {
1427 ++ .read = generic_read_dir,
1428 ++ .readdir = squashfs_readdir
1429 ++};
1430 ++
1431 ++SQSH_EXTERN struct inode_operations squashfs_dir_inode_ops = {
1432 ++ .lookup = squashfs_lookup
1433 ++};
1434 ++
1435 ++
1436 ++static struct buffer_head *get_block_length(struct super_block *s,
1437 ++ int *cur_index, int *offset, int *c_byte)
1438 ++{
1439 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
1440 ++ unsigned short temp;
1441 ++ struct buffer_head *bh;
1442 ++
1443 ++ if (!(bh = sb_bread(s, *cur_index)))
1444 ++ goto out;
1445 ++
1446 ++ if (msblk->devblksize - *offset == 1) {
1447 ++ if (msblk->swap)
1448 ++ ((unsigned char *) &temp)[1] = *((unsigned char *)
1449 ++ (bh->b_data + *offset));
1450 ++ else
1451 ++ ((unsigned char *) &temp)[0] = *((unsigned char *)
1452 ++ (bh->b_data + *offset));
1453 ++ brelse(bh);
1454 ++ if (!(bh = sb_bread(s, ++(*cur_index))))
1455 ++ goto out;
1456 ++ if (msblk->swap)
1457 ++ ((unsigned char *) &temp)[0] = *((unsigned char *)
1458 ++ bh->b_data);
1459 ++ else
1460 ++ ((unsigned char *) &temp)[1] = *((unsigned char *)
1461 ++ bh->b_data);
1462 ++ *c_byte = temp;
1463 ++ *offset = 1;
1464 ++ } else {
1465 ++ if (msblk->swap) {
1466 ++ ((unsigned char *) &temp)[1] = *((unsigned char *)
1467 ++ (bh->b_data + *offset));
1468 ++ ((unsigned char *) &temp)[0] = *((unsigned char *)
1469 ++ (bh->b_data + *offset + 1));
1470 ++ } else {
1471 ++ ((unsigned char *) &temp)[0] = *((unsigned char *)
1472 ++ (bh->b_data + *offset));
1473 ++ ((unsigned char *) &temp)[1] = *((unsigned char *)
1474 ++ (bh->b_data + *offset + 1));
1475 ++ }
1476 ++ *c_byte = temp;
1477 ++ *offset += 2;
1478 ++ }
1479 ++
1480 ++ if (SQUASHFS_CHECK_DATA(msblk->sblk.flags)) {
1481 ++ if (*offset == msblk->devblksize) {
1482 ++ brelse(bh);
1483 ++ if (!(bh = sb_bread(s, ++(*cur_index))))
1484 ++ goto out;
1485 ++ *offset = 0;
1486 ++ }
1487 ++ if (*((unsigned char *) (bh->b_data + *offset)) !=
1488 ++ SQUASHFS_MARKER_BYTE) {
1489 ++ ERROR("Metadata block marker corrupt @ %x\n",
1490 ++ *cur_index);
1491 ++ brelse(bh);
1492 ++ goto out;
1493 ++ }
1494 ++ (*offset)++;
1495 ++ }
1496 ++ return bh;
1497 ++
1498 ++out:
1499 ++ return NULL;
1500 ++}
1501 ++
1502 ++
1503 ++SQSH_EXTERN unsigned int squashfs_read_data(struct super_block *s, char *buffer,
1504 ++ long long index, unsigned int length,
1505 ++ long long *next_index, int srclength)
1506 ++{
1507 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
1508 ++ struct squashfs_super_block *sblk = &msblk->sblk;
1509 ++ struct buffer_head *bh[((SQUASHFS_FILE_MAX_SIZE - 1) >>
1510 ++ msblk->devblksize_log2) + 2];
1511 ++ unsigned int offset = index & ((1 << msblk->devblksize_log2) - 1);
1512 ++ unsigned int cur_index = index >> msblk->devblksize_log2;
1513 ++ int bytes, avail_bytes, b = 0, k = 0;
1514 ++ unsigned int compressed;
1515 ++ unsigned int c_byte = length;
1516 ++
1517 ++ if (c_byte) {
1518 ++ bytes = msblk->devblksize - offset;
1519 ++ compressed = SQUASHFS_COMPRESSED_BLOCK(c_byte);
1520 ++ c_byte = SQUASHFS_COMPRESSED_SIZE_BLOCK(c_byte);
1521 ++
1522 ++ TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", index, compressed
1523 ++ ? "" : "un", (unsigned int) c_byte, srclength);
1524 ++
1525 ++ if (c_byte > srclength || index < 0 || (index + c_byte) > sblk->bytes_used)
1526 ++ goto read_failure;
1527 ++
1528 ++ if (!(bh[0] = sb_getblk(s, cur_index)))
1529 ++ goto block_release;
1530 ++
1531 ++ for (b = 1; bytes < c_byte; b++) {
1532 ++ if (!(bh[b] = sb_getblk(s, ++cur_index)))
1533 ++ goto block_release;
1534 ++ bytes += msblk->devblksize;
1535 ++ }
1536 ++ ll_rw_block(READ, b, bh);
1537 ++ } else {
1538 ++ if (index < 0 || (index + 2) > sblk->bytes_used)
1539 ++ goto read_failure;
1540 ++
1541 ++ if (!(bh[0] = get_block_length(s, &cur_index, &offset,
1542 ++ &c_byte)))
1543 ++ goto read_failure;
1544 ++
1545 ++ bytes = msblk->devblksize - offset;
1546 ++ compressed = SQUASHFS_COMPRESSED(c_byte);
1547 ++ c_byte = SQUASHFS_COMPRESSED_SIZE(c_byte);
1548 ++
1549 ++ TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed
1550 ++ ? "" : "un", (unsigned int) c_byte);
1551 ++
1552 ++ if (c_byte > srclength || (index + c_byte) > sblk->bytes_used)
1553 ++ goto read_failure;
1554 ++
1555 ++ for (b = 1; bytes < c_byte; b++) {
1556 ++ if (!(bh[b] = sb_getblk(s, ++cur_index)))
1557 ++ goto block_release;
1558 ++ bytes += msblk->devblksize;
1559 ++ }
1560 ++ ll_rw_block(READ, b - 1, bh + 1);
1561 ++ }
1562 ++
1563 ++ if (compressed) {
1564 ++ int zlib_err = 0;
1565 ++
1566 ++ /*
1567 ++ * uncompress block
1568 ++ */
1569 ++
1570 ++ mutex_lock(&msblk->read_data_mutex);
1571 ++
1572 ++ msblk->stream.next_out = buffer;
1573 ++ msblk->stream.avail_out = srclength;
1574 ++
1575 ++ for (bytes = 0; k < b; k++) {
1576 ++ avail_bytes = (c_byte - bytes) > (msblk->devblksize - offset) ?
1577 ++ msblk->devblksize - offset :
1578 ++ c_byte - bytes;
1579 ++ wait_on_buffer(bh[k]);
1580 ++ if (!buffer_uptodate(bh[k]))
1581 ++ goto release_mutex;
1582 ++
1583 ++ msblk->stream.next_in = bh[k]->b_data + offset;
1584 ++ msblk->stream.avail_in = avail_bytes;
1585 ++
1586 ++ if (k == 0) {
1587 ++ zlib_err = zlib_inflateInit(&msblk->stream);
1588 ++ if (zlib_err != Z_OK) {
1589 ++ ERROR("zlib_inflateInit returned unexpected result 0x%x, srclength %d\n",
1590 ++ zlib_err, srclength);
1591 ++ goto release_mutex;
1592 ++ }
1593 ++
1594 ++ if (avail_bytes == 0) {
1595 ++ offset = 0;
1596 ++ brelse(bh[k]);
1597 ++ continue;
1598 ++ }
1599 ++ }
1600 ++
1601 ++ zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH);
1602 ++ if (zlib_err != Z_OK && zlib_err != Z_STREAM_END) {
1603 ++ ERROR("zlib_inflate returned unexpected result 0x%x, srclength %d, avail_in %d, avail_out %d\n",
1604 ++ zlib_err, srclength, msblk->stream.avail_in, msblk->stream.avail_out);
1605 ++ goto release_mutex;
1606 ++ }
1607 ++
1608 ++ bytes += avail_bytes;
1609 ++ offset = 0;
1610 ++ brelse(bh[k]);
1611 ++ }
1612 ++
1613 ++ if (zlib_err != Z_STREAM_END)
1614 ++ goto release_mutex;
1615 ++
1616 ++ zlib_err = zlib_inflateEnd(&msblk->stream);
1617 ++ if (zlib_err != Z_OK) {
1618 ++ ERROR("zlib_inflateEnd returned unexpected result 0x%x, srclength %d\n",
1619 ++ zlib_err, srclength);
1620 ++ goto release_mutex;
1621 ++ }
1622 ++ bytes = msblk->stream.total_out;
1623 ++ mutex_unlock(&msblk->read_data_mutex);
1624 ++ } else {
1625 ++ int i;
1626 ++
1627 ++ for(i = 0; i < b; i++) {
1628 ++ wait_on_buffer(bh[i]);
1629 ++ if(!buffer_uptodate(bh[i]))
1630 ++ goto block_release;
1631 ++ }
1632 ++
1633 ++ for (bytes = 0; k < b; k++) {
1634 ++ avail_bytes = (c_byte - bytes) > (msblk->devblksize - offset) ?
1635 ++ msblk->devblksize - offset :
1636 ++ c_byte - bytes;
1637 ++ memcpy(buffer + bytes, bh[k]->b_data + offset, avail_bytes);
1638 ++ bytes += avail_bytes;
1639 ++ offset = 0;
1640 ++ brelse(bh[k]);
1641 ++ }
1642 ++ }
1643 ++
1644 ++ if (next_index)
1645 ++ *next_index = index + c_byte + (length ? 0 :
1646 ++ (SQUASHFS_CHECK_DATA(msblk->sblk.flags)
1647 ++ ? 3 : 2));
1648 ++ return bytes;
1649 ++
1650 ++release_mutex:
1651 ++ mutex_unlock(&msblk->read_data_mutex);
1652 ++
1653 ++block_release:
1654 ++ for (; k < b; k++)
1655 ++ brelse(bh[k]);
1656 ++
1657 ++read_failure:
1658 ++ ERROR("sb_bread failed reading block 0x%x\n", cur_index);
1659 ++ return 0;
1660 ++}
1661 ++
1662 ++
1663 ++SQSH_EXTERN int squashfs_get_cached_block(struct super_block *s, char *buffer,
1664 ++ long long block, unsigned int offset,
1665 ++ int length, long long *next_block,
1666 ++ unsigned int *next_offset)
1667 ++{
1668 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
1669 ++ int n, i, bytes, return_length = length;
1670 ++ long long next_index;
1671 ++
1672 ++ TRACE("Entered squashfs_get_cached_block [%llx:%x]\n", block, offset);
1673 ++
1674 ++ while ( 1 ) {
1675 ++ for (i = 0; i < SQUASHFS_CACHED_BLKS; i++)
1676 ++ if (msblk->block_cache[i].block == block)
1677 ++ break;
1678 ++
1679 ++ mutex_lock(&msblk->block_cache_mutex);
1680 ++
1681 ++ if (i == SQUASHFS_CACHED_BLKS) {
1682 ++ /* read inode header block */
1683 ++ for (i = msblk->next_cache, n = SQUASHFS_CACHED_BLKS;
1684 ++ n ; n --, i = (i + 1) %
1685 ++ SQUASHFS_CACHED_BLKS)
1686 ++ if (msblk->block_cache[i].block !=
1687 ++ SQUASHFS_USED_BLK)
1688 ++ break;
1689 ++
1690 ++ if (n == 0) {
1691 ++ wait_queue_t wait;
1692 ++
1693 ++ init_waitqueue_entry(&wait, current);
1694 ++ add_wait_queue(&msblk->waitq, &wait);
1695 ++ set_current_state(TASK_UNINTERRUPTIBLE);
1696 ++ mutex_unlock(&msblk->block_cache_mutex);
1697 ++ schedule();
1698 ++ set_current_state(TASK_RUNNING);
1699 ++ remove_wait_queue(&msblk->waitq, &wait);
1700 ++ continue;
1701 ++ }
1702 ++ msblk->next_cache = (i + 1) % SQUASHFS_CACHED_BLKS;
1703 ++
1704 ++ if (msblk->block_cache[i].block ==
1705 ++ SQUASHFS_INVALID_BLK) {
1706 ++ if (!(msblk->block_cache[i].data =
1707 ++ kmalloc(SQUASHFS_METADATA_SIZE,
1708 ++ GFP_KERNEL))) {
1709 ++ ERROR("Failed to allocate cache"
1710 ++ "block\n");
1711 ++ mutex_unlock(&msblk->block_cache_mutex);
1712 ++ goto out;
1713 ++ }
1714 ++ }
1715 ++
1716 ++ msblk->block_cache[i].block = SQUASHFS_USED_BLK;
1717 ++ mutex_unlock(&msblk->block_cache_mutex);
1718 ++
1719 ++ msblk->block_cache[i].length = squashfs_read_data(s,
1720 ++ msblk->block_cache[i].data, block, 0, &next_index, SQUASHFS_METADATA_SIZE);
1721 ++ if (msblk->block_cache[i].length == 0) {
1722 ++ ERROR("Unable to read cache block [%llx:%x]\n",
1723 ++ block, offset);
1724 ++ mutex_lock(&msblk->block_cache_mutex);
1725 ++ msblk->block_cache[i].block = SQUASHFS_INVALID_BLK;
1726 ++ kfree(msblk->block_cache[i].data);
1727 ++ wake_up(&msblk->waitq);
1728 ++ mutex_unlock(&msblk->block_cache_mutex);
1729 ++ goto out;
1730 ++ }
1731 ++
1732 ++ mutex_lock(&msblk->block_cache_mutex);
1733 ++ wake_up(&msblk->waitq);
1734 ++ msblk->block_cache[i].block = block;
1735 ++ msblk->block_cache[i].next_index = next_index;
1736 ++ TRACE("Read cache block [%llx:%x]\n", block, offset);
1737 ++ }
1738 ++
1739 ++ if (msblk->block_cache[i].block != block) {
1740 ++ mutex_unlock(&msblk->block_cache_mutex);
1741 ++ continue;
1742 ++ }
1743 ++
1744 ++ bytes = msblk->block_cache[i].length - offset;
1745 ++
1746 ++ if (bytes < 1) {
1747 ++ mutex_unlock(&msblk->block_cache_mutex);
1748 ++ goto out;
1749 ++ } else if (bytes >= length) {
1750 ++ if (buffer)
1751 ++ memcpy(buffer, msblk->block_cache[i].data +
1752 ++ offset, length);
1753 ++ if (msblk->block_cache[i].length - offset == length) {
1754 ++ *next_block = msblk->block_cache[i].next_index;
1755 ++ *next_offset = 0;
1756 ++ } else {
1757 ++ *next_block = block;
1758 ++ *next_offset = offset + length;
1759 ++ }
1760 ++ mutex_unlock(&msblk->block_cache_mutex);
1761 ++ goto finish;
1762 ++ } else {
1763 ++ if (buffer) {
1764 ++ memcpy(buffer, msblk->block_cache[i].data +
1765 ++ offset, bytes);
1766 ++ buffer += bytes;
1767 ++ }
1768 ++ block = msblk->block_cache[i].next_index;
1769 ++ mutex_unlock(&msblk->block_cache_mutex);
1770 ++ length -= bytes;
1771 ++ offset = 0;
1772 ++ }
1773 ++ }
1774 ++
1775 ++finish:
1776 ++ return return_length;
1777 ++out:
1778 ++ return 0;
1779 ++}
1780 ++
1781 ++
1782 ++static int get_fragment_location(struct super_block *s, unsigned int fragment,
1783 ++ long long *fragment_start_block,
1784 ++ unsigned int *fragment_size)
1785 ++{
1786 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
1787 ++ long long start_block =
1788 ++ msblk->fragment_index[SQUASHFS_FRAGMENT_INDEX(fragment)];
1789 ++ int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
1790 ++ struct squashfs_fragment_entry fragment_entry;
1791 ++
1792 ++ if (msblk->swap) {
1793 ++ struct squashfs_fragment_entry sfragment_entry;
1794 ++
1795 ++ if (!squashfs_get_cached_block(s, (char *) &sfragment_entry,
1796 ++ start_block, offset,
1797 ++ sizeof(sfragment_entry), &start_block,
1798 ++ &offset))
1799 ++ goto out;
1800 ++ SQUASHFS_SWAP_FRAGMENT_ENTRY(&fragment_entry, &sfragment_entry);
1801 ++ } else
1802 ++ if (!squashfs_get_cached_block(s, (char *) &fragment_entry,
1803 ++ start_block, offset,
1804 ++ sizeof(fragment_entry), &start_block,
1805 ++ &offset))
1806 ++ goto out;
1807 ++
1808 ++ *fragment_start_block = fragment_entry.start_block;
1809 ++ *fragment_size = fragment_entry.size;
1810 ++
1811 ++ return 1;
1812 ++
1813 ++out:
1814 ++ return 0;
1815 ++}
1816 ++
1817 ++
1818 ++SQSH_EXTERN void release_cached_fragment(struct squashfs_sb_info *msblk, struct
1819 ++ squashfs_fragment_cache *fragment)
1820 ++{
1821 ++ mutex_lock(&msblk->fragment_mutex);
1822 ++ fragment->locked --;
1823 ++ wake_up(&msblk->fragment_wait_queue);
1824 ++ mutex_unlock(&msblk->fragment_mutex);
1825 ++}
1826 ++
1827 ++
1828 ++SQSH_EXTERN struct squashfs_fragment_cache *get_cached_fragment(struct super_block
1829 ++ *s, long long start_block,
1830 ++ int length)
1831 ++{
1832 ++ int i, n;
1833 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
1834 ++ struct squashfs_super_block *sblk = &msblk->sblk;
1835 ++
1836 ++ while ( 1 ) {
1837 ++ mutex_lock(&msblk->fragment_mutex);
1838 ++
1839 ++ for (i = 0; i < SQUASHFS_CACHED_FRAGMENTS &&
1840 ++ msblk->fragment[i].block != start_block; i++);
1841 ++
1842 ++ if (i == SQUASHFS_CACHED_FRAGMENTS) {
1843 ++ for (i = msblk->next_fragment, n =
1844 ++ SQUASHFS_CACHED_FRAGMENTS; n &&
1845 ++ msblk->fragment[i].locked; n--, i = (i + 1) %
1846 ++ SQUASHFS_CACHED_FRAGMENTS);
1847 ++
1848 ++ if (n == 0) {
1849 ++ wait_queue_t wait;
1850 ++
1851 ++ init_waitqueue_entry(&wait, current);
1852 ++ add_wait_queue(&msblk->fragment_wait_queue,
1853 ++ &wait);
1854 ++ set_current_state(TASK_UNINTERRUPTIBLE);
1855 ++ mutex_unlock(&msblk->fragment_mutex);
1856 ++ schedule();
1857 ++ set_current_state(TASK_RUNNING);
1858 ++ remove_wait_queue(&msblk->fragment_wait_queue,
1859 ++ &wait);
1860 ++ continue;
1861 ++ }
1862 ++ msblk->next_fragment = (msblk->next_fragment + 1) %
1863 ++ SQUASHFS_CACHED_FRAGMENTS;
1864 ++
1865 ++ if (msblk->fragment[i].data == NULL)
1866 ++ if (!(msblk->fragment[i].data = SQUASHFS_ALLOC
1867 ++ (SQUASHFS_FILE_MAX_SIZE))) {
1868 ++ ERROR("Failed to allocate fragment "
1869 ++ "cache block\n");
1870 ++ mutex_unlock(&msblk->fragment_mutex);
1871 ++ goto out;
1872 ++ }
1873 ++
1874 ++ msblk->fragment[i].block = SQUASHFS_INVALID_BLK;
1875 ++ msblk->fragment[i].locked = 1;
1876 ++ mutex_unlock(&msblk->fragment_mutex);
1877 ++
1878 ++ if (!(msblk->fragment[i].length = squashfs_read_data(s,
1879 ++ msblk->fragment[i].data,
1880 ++ start_block, length, NULL, sblk->block_size))) {
1881 ++ ERROR("Unable to read fragment cache block "
1882 ++ "[%llx]\n", start_block);
1883 ++ msblk->fragment[i].locked = 0;
1884 ++ smp_mb();
1885 ++ goto out;
1886 ++ }
1887 ++
1888 ++ mutex_lock(&msblk->fragment_mutex);
1889 ++ msblk->fragment[i].block = start_block;
1890 ++ TRACE("New fragment %d, start block %lld, locked %d\n",
1891 ++ i, msblk->fragment[i].block,
1892 ++ msblk->fragment[i].locked);
1893 ++ mutex_unlock(&msblk->fragment_mutex);
1894 ++ break;
1895 ++ }
1896 ++
1897 ++ msblk->fragment[i].locked++;
1898 ++ mutex_unlock(&msblk->fragment_mutex);
1899 ++ TRACE("Got fragment %d, start block %lld, locked %d\n", i,
1900 ++ msblk->fragment[i].block,
1901 ++ msblk->fragment[i].locked);
1902 ++ break;
1903 ++ }
1904 ++
1905 ++ return &msblk->fragment[i];
1906 ++
1907 ++out:
1908 ++ return NULL;
1909 ++}
1910 ++
1911 ++
1912 ++static void squashfs_new_inode(struct squashfs_sb_info *msblk, struct inode *i,
1913 ++ struct squashfs_base_inode_header *inodeb)
1914 ++{
1915 ++ i->i_ino = inodeb->inode_number;
1916 ++ i->i_mtime.tv_sec = inodeb->mtime;
1917 ++ i->i_atime.tv_sec = inodeb->mtime;
1918 ++ i->i_ctime.tv_sec = inodeb->mtime;
1919 ++ i->i_uid = msblk->uid[inodeb->uid];
1920 ++ i->i_mode = inodeb->mode;
1921 ++ i->i_size = 0;
1922 ++ if (inodeb->guid == SQUASHFS_GUIDS)
1923 ++ i->i_gid = i->i_uid;
1924 ++ else
1925 ++ i->i_gid = msblk->guid[inodeb->guid];
1926 ++}
1927 ++
1928 ++
1929 ++static squashfs_inode_t squashfs_inode_lookup(struct super_block *s, int ino)
1930 ++{
1931 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
1932 ++ long long start = msblk->inode_lookup_table[SQUASHFS_LOOKUP_BLOCK(ino - 1)];
1933 ++ int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino - 1);
1934 ++ squashfs_inode_t inode;
1935 ++
1936 ++ TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino);
1937 ++
1938 ++ if (msblk->swap) {
1939 ++ squashfs_inode_t sinode;
1940 ++
1941 ++ if (!squashfs_get_cached_block(s, (char *) &sinode, start, offset,
1942 ++ sizeof(sinode), &start, &offset))
1943 ++ goto out;
1944 ++ SQUASHFS_SWAP_INODE_T((&inode), &sinode);
1945 ++ } else if (!squashfs_get_cached_block(s, (char *) &inode, start, offset,
1946 ++ sizeof(inode), &start, &offset))
1947 ++ goto out;
1948 ++
1949 ++ TRACE("squashfs_inode_lookup, inode = 0x%llx\n", inode);
1950 ++
1951 ++ return inode;
1952 ++
1953 ++out:
1954 ++ return SQUASHFS_INVALID_BLK;
1955 ++}
1956 ++
1957 ++
1958 ++static void vfs_read_inode(struct inode *i)
1959 ++{
1960 ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info;
1961 ++ squashfs_inode_t inode = squashfs_inode_lookup(i->i_sb, i->i_ino);
1962 ++
1963 ++ TRACE("Entered vfs_read_inode\n");
1964 ++
1965 ++ if(inode != SQUASHFS_INVALID_BLK)
1966 ++ (msblk->read_inode)(i, inode);
1967 ++}
1968 ++
1969 ++
1970 ++static struct dentry *squashfs_get_parent(struct dentry *child)
1971 ++{
1972 ++ struct inode *i = child->d_inode;
1973 ++ struct inode *parent = iget(i->i_sb, SQUASHFS_I(i)->u.s2.parent_inode);
1974 ++ struct dentry *rv;
1975 ++
1976 ++ TRACE("Entered squashfs_get_parent\n");
1977 ++
1978 ++ if(parent == NULL) {
1979 ++ rv = ERR_PTR(-EACCES);
1980 ++ goto out;
1981 ++ }
1982 ++
1983 ++ rv = d_alloc_anon(parent);
1984 ++ if(rv == NULL)
1985 ++ rv = ERR_PTR(-ENOMEM);
1986 ++
1987 ++out:
1988 ++ return rv;
1989 ++}
1990 ++
1991 ++
1992 ++SQSH_EXTERN struct inode *squashfs_iget(struct super_block *s, squashfs_inode_t inode, unsigned int inode_number)
1993 ++{
1994 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
1995 ++ struct inode *i = iget_locked(s, inode_number);
1996 ++
1997 ++ TRACE("Entered squashfs_iget\n");
1998 ++
1999 ++ if(i && (i->i_state & I_NEW)) {
2000 ++ (msblk->read_inode)(i, inode);
2001 ++ unlock_new_inode(i);
2002 ++ }
2003 ++
2004 ++ return i;
2005 ++}
2006 ++
2007 ++
2008 ++static int squashfs_read_inode(struct inode *i, squashfs_inode_t inode)
2009 ++{
2010 ++ struct super_block *s = i->i_sb;
2011 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
2012 ++ struct squashfs_super_block *sblk = &msblk->sblk;
2013 ++ long long block = SQUASHFS_INODE_BLK(inode) +
2014 ++ sblk->inode_table_start;
2015 ++ unsigned int offset = SQUASHFS_INODE_OFFSET(inode);
2016 ++ long long next_block;
2017 ++ unsigned int next_offset;
2018 ++ union squashfs_inode_header id, sid;
2019 ++ struct squashfs_base_inode_header *inodeb = &id.base,
2020 ++ *sinodeb = &sid.base;
2021 ++
2022 ++ TRACE("Entered squashfs_read_inode\n");
2023 ++
2024 ++ if (msblk->swap) {
2025 ++ if (!squashfs_get_cached_block(s, (char *) sinodeb, block,
2026 ++ offset, sizeof(*sinodeb), &next_block,
2027 ++ &next_offset))
2028 ++ goto failed_read;
2029 ++ SQUASHFS_SWAP_BASE_INODE_HEADER(inodeb, sinodeb,
2030 ++ sizeof(*sinodeb));
2031 ++ } else
2032 ++ if (!squashfs_get_cached_block(s, (char *) inodeb, block,
2033 ++ offset, sizeof(*inodeb), &next_block,
2034 ++ &next_offset))
2035 ++ goto failed_read;
2036 ++
2037 ++ squashfs_new_inode(msblk, i, inodeb);
2038 ++
2039 ++ switch(inodeb->inode_type) {
2040 ++ case SQUASHFS_FILE_TYPE: {
2041 ++ unsigned int frag_size;
2042 ++ long long frag_blk;
2043 ++ struct squashfs_reg_inode_header *inodep = &id.reg;
2044 ++ struct squashfs_reg_inode_header *sinodep = &sid.reg;
2045 ++
2046 ++ if (msblk->swap) {
2047 ++ if (!squashfs_get_cached_block(s, (char *)
2048 ++ sinodep, block, offset,
2049 ++ sizeof(*sinodep), &next_block,
2050 ++ &next_offset))
2051 ++ goto failed_read;
2052 ++ SQUASHFS_SWAP_REG_INODE_HEADER(inodep, sinodep);
2053 ++ } else
2054 ++ if (!squashfs_get_cached_block(s, (char *)
2055 ++ inodep, block, offset,
2056 ++ sizeof(*inodep), &next_block,
2057 ++ &next_offset))
2058 ++ goto failed_read;
2059 ++
2060 ++ frag_blk = SQUASHFS_INVALID_BLK;
2061 ++ if (inodep->fragment != SQUASHFS_INVALID_FRAG &&
2062 ++ !get_fragment_location(s,
2063 ++ inodep->fragment, &frag_blk, &frag_size))
2064 ++ goto failed_read;
2065 ++
2066 ++ i->i_nlink = 1;
2067 ++ i->i_size = inodep->file_size;
2068 ++ i->i_fop = &generic_ro_fops;
2069 ++ i->i_mode |= S_IFREG;
2070 ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1;
2071 ++ SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk;
2072 ++ SQUASHFS_I(i)->u.s1.fragment_size = frag_size;
2073 ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset;
2074 ++ SQUASHFS_I(i)->start_block = inodep->start_block;
2075 ++ SQUASHFS_I(i)->u.s1.block_list_start = next_block;
2076 ++ SQUASHFS_I(i)->offset = next_offset;
2077 ++ if (sblk->block_size > 4096)
2078 ++ i->i_data.a_ops = &squashfs_aops;
2079 ++ else
2080 ++ i->i_data.a_ops = &squashfs_aops_4K;
2081 ++
2082 ++ TRACE("File inode %x:%x, start_block %llx, "
2083 ++ "block_list_start %llx, offset %x\n",
2084 ++ SQUASHFS_INODE_BLK(inode), offset,
2085 ++ inodep->start_block, next_block,
2086 ++ next_offset);
2087 ++ break;
2088 ++ }
2089 ++ case SQUASHFS_LREG_TYPE: {
2090 ++ unsigned int frag_size;
2091 ++ long long frag_blk;
2092 ++ struct squashfs_lreg_inode_header *inodep = &id.lreg;
2093 ++ struct squashfs_lreg_inode_header *sinodep = &sid.lreg;
2094 ++
2095 ++ if (msblk->swap) {
2096 ++ if (!squashfs_get_cached_block(s, (char *)
2097 ++ sinodep, block, offset,
2098 ++ sizeof(*sinodep), &next_block,
2099 ++ &next_offset))
2100 ++ goto failed_read;
2101 ++ SQUASHFS_SWAP_LREG_INODE_HEADER(inodep, sinodep);
2102 ++ } else
2103 ++ if (!squashfs_get_cached_block(s, (char *)
2104 ++ inodep, block, offset,
2105 ++ sizeof(*inodep), &next_block,
2106 ++ &next_offset))
2107 ++ goto failed_read;
2108 ++
2109 ++ frag_blk = SQUASHFS_INVALID_BLK;
2110 ++ if (inodep->fragment != SQUASHFS_INVALID_FRAG &&
2111 ++ !get_fragment_location(s,
2112 ++ inodep->fragment, &frag_blk, &frag_size))
2113 ++ goto failed_read;
2114 ++
2115 ++ i->i_nlink = inodep->nlink;
2116 ++ i->i_size = inodep->file_size;
2117 ++ i->i_fop = &generic_ro_fops;
2118 ++ i->i_mode |= S_IFREG;
2119 ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1;
2120 ++ SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk;
2121 ++ SQUASHFS_I(i)->u.s1.fragment_size = frag_size;
2122 ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset;
2123 ++ SQUASHFS_I(i)->start_block = inodep->start_block;
2124 ++ SQUASHFS_I(i)->u.s1.block_list_start = next_block;
2125 ++ SQUASHFS_I(i)->offset = next_offset;
2126 ++ if (sblk->block_size > 4096)
2127 ++ i->i_data.a_ops = &squashfs_aops;
2128 ++ else
2129 ++ i->i_data.a_ops = &squashfs_aops_4K;
2130 ++
2131 ++ TRACE("File inode %x:%x, start_block %llx, "
2132 ++ "block_list_start %llx, offset %x\n",
2133 ++ SQUASHFS_INODE_BLK(inode), offset,
2134 ++ inodep->start_block, next_block,
2135 ++ next_offset);
2136 ++ break;
2137 ++ }
2138 ++ case SQUASHFS_DIR_TYPE: {
2139 ++ struct squashfs_dir_inode_header *inodep = &id.dir;
2140 ++ struct squashfs_dir_inode_header *sinodep = &sid.dir;
2141 ++
2142 ++ if (msblk->swap) {
2143 ++ if (!squashfs_get_cached_block(s, (char *)
2144 ++ sinodep, block, offset,
2145 ++ sizeof(*sinodep), &next_block,
2146 ++ &next_offset))
2147 ++ goto failed_read;
2148 ++ SQUASHFS_SWAP_DIR_INODE_HEADER(inodep, sinodep);
2149 ++ } else
2150 ++ if (!squashfs_get_cached_block(s, (char *)
2151 ++ inodep, block, offset,
2152 ++ sizeof(*inodep), &next_block,
2153 ++ &next_offset))
2154 ++ goto failed_read;
2155 ++
2156 ++ i->i_nlink = inodep->nlink;
2157 ++ i->i_size = inodep->file_size;
2158 ++ i->i_op = &squashfs_dir_inode_ops;
2159 ++ i->i_fop = &squashfs_dir_ops;
2160 ++ i->i_mode |= S_IFDIR;
2161 ++ SQUASHFS_I(i)->start_block = inodep->start_block;
2162 ++ SQUASHFS_I(i)->offset = inodep->offset;
2163 ++ SQUASHFS_I(i)->u.s2.directory_index_count = 0;
2164 ++ SQUASHFS_I(i)->u.s2.parent_inode = inodep->parent_inode;
2165 ++
2166 ++ TRACE("Directory inode %x:%x, start_block %x, offset "
2167 ++ "%x\n", SQUASHFS_INODE_BLK(inode),
2168 ++ offset, inodep->start_block,
2169 ++ inodep->offset);
2170 ++ break;
2171 ++ }
2172 ++ case SQUASHFS_LDIR_TYPE: {
2173 ++ struct squashfs_ldir_inode_header *inodep = &id.ldir;
2174 ++ struct squashfs_ldir_inode_header *sinodep = &sid.ldir;
2175 ++
2176 ++ if (msblk->swap) {
2177 ++ if (!squashfs_get_cached_block(s, (char *)
2178 ++ sinodep, block, offset,
2179 ++ sizeof(*sinodep), &next_block,
2180 ++ &next_offset))
2181 ++ goto failed_read;
2182 ++ SQUASHFS_SWAP_LDIR_INODE_HEADER(inodep,
2183 ++ sinodep);
2184 ++ } else
2185 ++ if (!squashfs_get_cached_block(s, (char *)
2186 ++ inodep, block, offset,
2187 ++ sizeof(*inodep), &next_block,
2188 ++ &next_offset))
2189 ++ goto failed_read;
2190 ++
2191 ++ i->i_nlink = inodep->nlink;
2192 ++ i->i_size = inodep->file_size;
2193 ++ i->i_op = &squashfs_dir_inode_ops;
2194 ++ i->i_fop = &squashfs_dir_ops;
2195 ++ i->i_mode |= S_IFDIR;
2196 ++ SQUASHFS_I(i)->start_block = inodep->start_block;
2197 ++ SQUASHFS_I(i)->offset = inodep->offset;
2198 ++ SQUASHFS_I(i)->u.s2.directory_index_start = next_block;
2199 ++ SQUASHFS_I(i)->u.s2.directory_index_offset =
2200 ++ next_offset;
2201 ++ SQUASHFS_I(i)->u.s2.directory_index_count =
2202 ++ inodep->i_count;
2203 ++ SQUASHFS_I(i)->u.s2.parent_inode = inodep->parent_inode;
2204 ++
2205 ++ TRACE("Long directory inode %x:%x, start_block %x, "
2206 ++ "offset %x\n",
2207 ++ SQUASHFS_INODE_BLK(inode), offset,
2208 ++ inodep->start_block, inodep->offset);
2209 ++ break;
2210 ++ }
2211 ++ case SQUASHFS_SYMLINK_TYPE: {
2212 ++ struct squashfs_symlink_inode_header *inodep =
2213 ++ &id.symlink;
2214 ++ struct squashfs_symlink_inode_header *sinodep =
2215 ++ &sid.symlink;
2216 ++
2217 ++ if (msblk->swap) {
2218 ++ if (!squashfs_get_cached_block(s, (char *)
2219 ++ sinodep, block, offset,
2220 ++ sizeof(*sinodep), &next_block,
2221 ++ &next_offset))
2222 ++ goto failed_read;
2223 ++ SQUASHFS_SWAP_SYMLINK_INODE_HEADER(inodep,
2224 ++ sinodep);
2225 ++ } else
2226 ++ if (!squashfs_get_cached_block(s, (char *)
2227 ++ inodep, block, offset,
2228 ++ sizeof(*inodep), &next_block,
2229 ++ &next_offset))
2230 ++ goto failed_read;
2231 ++
2232 ++ i->i_nlink = inodep->nlink;
2233 ++ i->i_size = inodep->symlink_size;
2234 ++ i->i_op = &page_symlink_inode_operations;
2235 ++ i->i_data.a_ops = &squashfs_symlink_aops;
2236 ++ i->i_mode |= S_IFLNK;
2237 ++ SQUASHFS_I(i)->start_block = next_block;
2238 ++ SQUASHFS_I(i)->offset = next_offset;
2239 ++
2240 ++ TRACE("Symbolic link inode %x:%x, start_block %llx, "
2241 ++ "offset %x\n",
2242 ++ SQUASHFS_INODE_BLK(inode), offset,
2243 ++ next_block, next_offset);
2244 ++ break;
2245 ++ }
2246 ++ case SQUASHFS_BLKDEV_TYPE:
2247 ++ case SQUASHFS_CHRDEV_TYPE: {
2248 ++ struct squashfs_dev_inode_header *inodep = &id.dev;
2249 ++ struct squashfs_dev_inode_header *sinodep = &sid.dev;
2250 ++
2251 ++ if (msblk->swap) {
2252 ++ if (!squashfs_get_cached_block(s, (char *)
2253 ++ sinodep, block, offset,
2254 ++ sizeof(*sinodep), &next_block,
2255 ++ &next_offset))
2256 ++ goto failed_read;
2257 ++ SQUASHFS_SWAP_DEV_INODE_HEADER(inodep, sinodep);
2258 ++ } else
2259 ++ if (!squashfs_get_cached_block(s, (char *)
2260 ++ inodep, block, offset,
2261 ++ sizeof(*inodep), &next_block,
2262 ++ &next_offset))
2263 ++ goto failed_read;
2264 ++
2265 ++ i->i_nlink = inodep->nlink;
2266 ++ i->i_mode |= (inodeb->inode_type ==
2267 ++ SQUASHFS_CHRDEV_TYPE) ? S_IFCHR :
2268 ++ S_IFBLK;
2269 ++ init_special_inode(i, i->i_mode,
2270 ++ old_decode_dev(inodep->rdev));
2271 ++
2272 ++ TRACE("Device inode %x:%x, rdev %x\n",
2273 ++ SQUASHFS_INODE_BLK(inode), offset,
2274 ++ inodep->rdev);
2275 ++ break;
2276 ++ }
2277 ++ case SQUASHFS_FIFO_TYPE:
2278 ++ case SQUASHFS_SOCKET_TYPE: {
2279 ++ struct squashfs_ipc_inode_header *inodep = &id.ipc;
2280 ++ struct squashfs_ipc_inode_header *sinodep = &sid.ipc;
2281 ++
2282 ++ if (msblk->swap) {
2283 ++ if (!squashfs_get_cached_block(s, (char *)
2284 ++ sinodep, block, offset,
2285 ++ sizeof(*sinodep), &next_block,
2286 ++ &next_offset))
2287 ++ goto failed_read;
2288 ++ SQUASHFS_SWAP_IPC_INODE_HEADER(inodep, sinodep);
2289 ++ } else
2290 ++ if (!squashfs_get_cached_block(s, (char *)
2291 ++ inodep, block, offset,
2292 ++ sizeof(*inodep), &next_block,
2293 ++ &next_offset))
2294 ++ goto failed_read;
2295 ++
2296 ++ i->i_nlink = inodep->nlink;
2297 ++ i->i_mode |= (inodeb->inode_type == SQUASHFS_FIFO_TYPE)
2298 ++ ? S_IFIFO : S_IFSOCK;
2299 ++ init_special_inode(i, i->i_mode, 0);
2300 ++ break;
2301 ++ }
2302 ++ default:
2303 ++ ERROR("Unknown inode type %d in squashfs_iget!\n",
2304 ++ inodeb->inode_type);
2305 ++ goto failed_read1;
2306 ++ }
2307 ++
2308 ++ return 1;
2309 ++
2310 ++failed_read:
2311 ++ ERROR("Unable to read inode [%llx:%x]\n", block, offset);
2312 ++
2313 ++failed_read1:
2314 ++ make_bad_inode(i);
2315 ++ return 0;
2316 ++}
2317 ++
2318 ++
2319 ++static int read_inode_lookup_table(struct super_block *s)
2320 ++{
2321 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
2322 ++ struct squashfs_super_block *sblk = &msblk->sblk;
2323 ++ unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(sblk->inodes);
2324 ++
2325 ++ TRACE("In read_inode_lookup_table, length %d\n", length);
2326 ++
2327 ++ /* Allocate inode lookup table */
2328 ++ if (!(msblk->inode_lookup_table = kmalloc(length, GFP_KERNEL))) {
2329 ++ ERROR("Failed to allocate inode lookup table\n");
2330 ++ return 0;
2331 ++ }
2332 ++
2333 ++ if (!squashfs_read_data(s, (char *) msblk->inode_lookup_table,
2334 ++ sblk->lookup_table_start, length |
2335 ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) {
2336 ++ ERROR("unable to read inode lookup table\n");
2337 ++ return 0;
2338 ++ }
2339 ++
2340 ++ if (msblk->swap) {
2341 ++ int i;
2342 ++ long long block;
2343 ++
2344 ++ for (i = 0; i < SQUASHFS_LOOKUP_BLOCKS(sblk->inodes); i++) {
2345 ++ SQUASHFS_SWAP_LOOKUP_BLOCKS((&block),
2346 ++ &msblk->inode_lookup_table[i], 1);
2347 ++ msblk->inode_lookup_table[i] = block;
2348 ++ }
2349 ++ }
2350 ++
2351 ++ return 1;
2352 ++}
2353 ++
2354 ++
2355 ++static int read_fragment_index_table(struct super_block *s)
2356 ++{
2357 ++ struct squashfs_sb_info *msblk = s->s_fs_info;
2358 ++ struct squashfs_super_block *sblk = &msblk->sblk;
2359 ++ unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(sblk->fragments);
2360 ++
2361 ++ if(length == 0)
2362 ++ return 1;
2363 ++
2364 ++ /* Allocate fragment index table */
2365 ++ if (!(msblk->fragment_index = kmalloc(length, GFP_KERNEL))) {
2366 ++ ERROR("Failed to allocate fragment index table\n");
2367 ++ return 0;
2368 ++ }
2369 ++
2370 ++ if (!squashfs_read_data(s, (char *) msblk->fragment_index,
2371 ++ sblk->fragment_table_start, length |
2372 ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) {
2373 ++ ERROR("unable to read fragment index table\n");
2374 ++ return 0;
2375 ++ }
2376 ++
2377 ++ if (msblk->swap) {
2378 ++ int i;
2379 ++ long long fragment;
2380 ++
2381 ++ for (i = 0; i < SQUASHFS_FRAGMENT_INDEXES(sblk->fragments); i++) {
2382 ++ SQUASHFS_SWAP_FRAGMENT_INDEXES((&fragment),
2383 ++ &msblk->fragment_index[i], 1);
2384 ++ msblk->fragment_index[i] = fragment;
2385 ++ }
2386 ++ }
2387 ++
2388 ++ return 1;
2389 ++}
2390 ++
2391 ++
2392 ++static int supported_squashfs_filesystem(struct squashfs_sb_info *msblk, int silent)
2393 ++{
2394 ++ struct squashfs_super_block *sblk = &msblk->sblk;
2395 ++
2396 ++ msblk->read_inode = squashfs_read_inode;
2397 ++ msblk->read_blocklist = read_blocklist;
2398 ++ msblk->read_fragment_index_table = read_fragment_index_table;
2399 ++
2400 ++ if (sblk->s_major == 1) {
2401 ++ if (!squashfs_1_0_supported(msblk)) {
2402 ++ SERROR("Major/Minor mismatch, Squashfs 1.0 filesystems "
2403 ++ "are unsupported\n");
2404 ++ SERROR("Please recompile with "
2405 ++ "Squashfs 1.0 support enabled\n");
2406 ++ return 0;
2407 ++ }
2408 ++ } else if (sblk->s_major == 2) {
2409 ++ if (!squashfs_2_0_supported(msblk)) {
2410 ++ SERROR("Major/Minor mismatch, Squashfs 2.0 filesystems "
2411 ++ "are unsupported\n");
2412 ++ SERROR("Please recompile with "
2413 ++ "Squashfs 2.0 support enabled\n");
2414 ++ return 0;
2415 ++ }
2416 ++ } else if(sblk->s_major != SQUASHFS_MAJOR || sblk->s_minor >
2417 ++ SQUASHFS_MINOR) {
2418 ++ SERROR("Major/Minor mismatch, trying to mount newer %d.%d "
2419 ++ "filesystem\n", sblk->s_major, sblk->s_minor);
2420 ++ SERROR("Please update your kernel\n");
2421 ++ return 0;
2422 ++ }
2423 ++
2424 ++ return 1;
2425 ++}
2426 ++
2427 ++
2428 ++static int squashfs_fill_super(struct super_block *s, void *data, int silent)
2429 ++{
2430 ++ struct squashfs_sb_info *msblk;
2431 ++ struct squashfs_super_block *sblk;
2432 ++ int i;
2433 ++ char b[BDEVNAME_SIZE];
2434 ++ struct inode *root;
2435 ++
2436 ++ TRACE("Entered squashfs_read_superblock\n");
2437 ++
2438 ++ if (!(s->s_fs_info = kmalloc(sizeof(struct squashfs_sb_info),
2439 ++ GFP_KERNEL))) {
2440 ++ ERROR("Failed to allocate superblock\n");
2441 ++ goto failure;
2442 ++ }
2443 ++ memset(s->s_fs_info, 0, sizeof(struct squashfs_sb_info));
2444 ++ msblk = s->s_fs_info;
2445 ++ if (!(msblk->stream.workspace = vmalloc(zlib_inflate_workspacesize()))) {
2446 ++ ERROR("Failed to allocate zlib workspace\n");
2447 ++ goto failure;
2448 ++ }
2449 ++ sblk = &msblk->sblk;
2450 ++
2451 ++ msblk->devblksize = sb_min_blocksize(s, BLOCK_SIZE);
2452 ++ msblk->devblksize_log2 = ffz(~msblk->devblksize);
2453 ++
2454 ++ mutex_init(&msblk->read_data_mutex);
2455 ++ mutex_init(&msblk->read_page_mutex);
2456 ++ mutex_init(&msblk->block_cache_mutex);
2457 ++ mutex_init(&msblk->fragment_mutex);
2458 ++ mutex_init(&msblk->meta_index_mutex);
2459 ++
2460 ++ init_waitqueue_head(&msblk->waitq);
2461 ++ init_waitqueue_head(&msblk->fragment_wait_queue);
2462 ++
2463 ++ sblk->bytes_used = sizeof(struct squashfs_super_block);
2464 ++ if (!squashfs_read_data(s, (char *) sblk, SQUASHFS_START,
2465 ++ sizeof(struct squashfs_super_block) |
2466 ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, sizeof(struct squashfs_super_block))) {
2467 ++ SERROR("unable to read superblock\n");
2468 ++ goto failed_mount;
2469 ++ }
2470 ++
2471 ++ /* Check it is a SQUASHFS superblock */
2472 ++ msblk->swap = 0;
2473 ++ if ((s->s_magic = sblk->s_magic) != SQUASHFS_MAGIC) {
2474 ++ if (sblk->s_magic == SQUASHFS_MAGIC_SWAP) {
2475 ++ struct squashfs_super_block ssblk;
2476 ++
2477 ++ WARNING("Mounting a different endian SQUASHFS "
2478 ++ "filesystem on %s\n", bdevname(s->s_bdev, b));
2479 ++
2480 ++ SQUASHFS_SWAP_SUPER_BLOCK(&ssblk, sblk);
2481 ++ memcpy(sblk, &ssblk, sizeof(struct squashfs_super_block));
2482 ++ msblk->swap = 1;
2483 ++ } else {
2484 ++ SERROR("Can't find a SQUASHFS superblock on %s\n",
2485 ++ bdevname(s->s_bdev, b));
2486 ++ goto failed_mount;
2487 ++ }
2488 ++ }
2489 ++
2490 ++ /* Check the MAJOR & MINOR versions */
2491 ++ if(!supported_squashfs_filesystem(msblk, silent))
2492 ++ goto failed_mount;
2493 ++
2494 ++ /* Check the filesystem does not extend beyond the end of the
2495 ++ block device */
2496 ++ if(sblk->bytes_used < 0 || sblk->bytes_used > i_size_read(s->s_bdev->bd_inode))
2497 ++ goto failed_mount;
2498 ++
2499 ++ /* Check the root inode for sanity */
2500 ++ if (SQUASHFS_INODE_OFFSET(sblk->root_inode) > SQUASHFS_METADATA_SIZE)
2501 ++ goto failed_mount;
2502 ++
2503 ++ TRACE("Found valid superblock on %s\n", bdevname(s->s_bdev, b));
2504 ++ TRACE("Inodes are %scompressed\n",
2505 ++ SQUASHFS_UNCOMPRESSED_INODES
2506 ++ (sblk->flags) ? "un" : "");
2507 ++ TRACE("Data is %scompressed\n",
2508 ++ SQUASHFS_UNCOMPRESSED_DATA(sblk->flags)
2509 ++ ? "un" : "");
2510 ++ TRACE("Check data is %s present in the filesystem\n",
2511 ++ SQUASHFS_CHECK_DATA(sblk->flags) ?
2512 ++ "" : "not");
2513 ++ TRACE("Filesystem size %lld bytes\n", sblk->bytes_used);
2514 ++ TRACE("Block size %d\n", sblk->block_size);
2515 ++ TRACE("Number of inodes %d\n", sblk->inodes);
2516 ++ if (sblk->s_major > 1)
2517 ++ TRACE("Number of fragments %d\n", sblk->fragments);
2518 ++ TRACE("Number of uids %d\n", sblk->no_uids);
2519 ++ TRACE("Number of gids %d\n", sblk->no_guids);
2520 ++ TRACE("sblk->inode_table_start %llx\n", sblk->inode_table_start);
2521 ++ TRACE("sblk->directory_table_start %llx\n", sblk->directory_table_start);
2522 ++ if (sblk->s_major > 1)
2523 ++ TRACE("sblk->fragment_table_start %llx\n",
2524 ++ sblk->fragment_table_start);
2525 ++ TRACE("sblk->uid_start %llx\n", sblk->uid_start);
2526 ++
2527 ++ s->s_flags |= MS_RDONLY;
2528 ++ s->s_op = &squashfs_super_ops;
2529 ++
2530 ++ /* Init inode_table block pointer array */
2531 ++ if (!(msblk->block_cache = kmalloc(sizeof(struct squashfs_cache) *
2532 ++ SQUASHFS_CACHED_BLKS, GFP_KERNEL))) {
2533 ++ ERROR("Failed to allocate block cache\n");
2534 ++ goto failed_mount;
2535 ++ }
2536 ++
2537 ++ for (i = 0; i < SQUASHFS_CACHED_BLKS; i++)
2538 ++ msblk->block_cache[i].block = SQUASHFS_INVALID_BLK;
2539 ++
2540 ++ msblk->next_cache = 0;
2541 ++
2542 ++ /* Allocate read_page block */
2543 ++ if (!(msblk->read_page = kmalloc(sblk->block_size, GFP_KERNEL))) {
2544 ++ ERROR("Failed to allocate read_page block\n");
2545 ++ goto failed_mount;
2546 ++ }
2547 ++
2548 ++ /* Allocate uid and gid tables */
2549 ++ if (!(msblk->uid = kmalloc((sblk->no_uids + sblk->no_guids) *
2550 ++ sizeof(unsigned int), GFP_KERNEL))) {
2551 ++ ERROR("Failed to allocate uid/gid table\n");
2552 ++ goto failed_mount;
2553 ++ }
2554 ++ msblk->guid = msblk->uid + sblk->no_uids;
2555 ++
2556 ++ if (msblk->swap) {
2557 ++ unsigned int suid[sblk->no_uids + sblk->no_guids];
2558 ++
2559 ++ if (!squashfs_read_data(s, (char *) &suid, sblk->uid_start,
2560 ++ ((sblk->no_uids + sblk->no_guids) *
2561 ++ sizeof(unsigned int)) |
2562 ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, (sblk->no_uids + sblk->no_guids) * sizeof(unsigned int))) {
2563 ++ ERROR("unable to read uid/gid table\n");
2564 ++ goto failed_mount;
2565 ++ }
2566 ++
2567 ++ SQUASHFS_SWAP_DATA(msblk->uid, suid, (sblk->no_uids +
2568 ++ sblk->no_guids), (sizeof(unsigned int) * 8));
2569 ++ } else
2570 ++ if (!squashfs_read_data(s, (char *) msblk->uid, sblk->uid_start,
2571 ++ ((sblk->no_uids + sblk->no_guids) *
2572 ++ sizeof(unsigned int)) |
2573 ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, (sblk->no_uids + sblk->no_guids) * sizeof(unsigned int))) {
2574 ++ ERROR("unable to read uid/gid table\n");
2575 ++ goto failed_mount;
2576 ++ }