[PATCH v2] ext4fs: Add ext4 extent cache for read operations

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH v2] ext4fs: Add ext4 extent cache for read operations

Ionut Nicu

In an ext4 filesystem, the inode corresponding to a file has a 60-byte
area which contains an extent header structure and up to 4 extent
structures (5 x 12 bytes).

For files that need more than 4 extents to be represented (either files
larger than 4 x 128MB = 512MB or smaller files but very fragmented),
ext4 creates extent index structures. Each extent index points to a 4KB
physical block where one extent header and additional 340 extents could
be stored.

The current u-boot ext4 code is very inefficient when it tries to load a
file which has extent indexes. For each logical file block the code will
read over and over again the same blocks of 4096 bytes from the disk.

Since the extent tree in a file is always the same, we can cache the
extent structures in memory before actually starting to read the file.

This patch creates a simple linked list of structures holding information
about all the extents used to represent a file. The list is sorted by
the logical block number (ee_block) so that we can easily find the
proper extent information for any file block.

Without this patch, a 69MB file which had just one extent index pointing
to a block with another 6 extents was read in approximately 3 minutes.
With this patch applied the same file can be read in almost 20 seconds.

Signed-off-by: Ionut Nicu <[hidden email]>
---

Changes since v1:
 * build the extent cache in ext4fs_read_file instead of ext4fs_read
 * fixed extent start address calculation


 fs/ext4/ext4_common.c |  164 +++++++++++++++++++++++++++++--------------------
 fs/ext4/ext4_common.h |    3 +
 fs/ext4/ext4fs.c      |   36 ++++++++---
 3 files changed, 130 insertions(+), 73 deletions(-)

diff --git a/fs/ext4/ext4_common.c b/fs/ext4/ext4_common.c
index 02da75c..6584892 100644
--- a/fs/ext4/ext4_common.c
+++ b/fs/ext4/ext4_common.c
@@ -26,6 +26,7 @@
 #include <stddef.h>
 #include <linux/stat.h>
 #include <linux/time.h>
+#include <linux/list.h>
 #include <asm/byteorder.h>
 #include "ext4_common.h"
 
@@ -44,6 +45,14 @@ int ext4fs_indir3_blkno = -1;
 struct ext2_inode *g_parent_inode;
 static int symlinknest;
 
+struct ext4_extent_node {
+ uint32_t block;
+ uint16_t len;
+ uint64_t start;
+ struct list_head lh;
+};
+static LIST_HEAD(ext4_extent_lh);
+
 #if defined(CONFIG_EXT4_WRITE)
 uint32_t ext4fs_div_roundup(uint32_t size, uint32_t n)
 {
@@ -1407,45 +1416,102 @@ void ext4fs_allocate_blocks(struct ext2_inode *file_inode,
 
 #endif
 
-static struct ext4_extent_header *ext4fs_get_extent_block
- (struct ext2_data *data, char *buf,
- struct ext4_extent_header *ext_block,
- uint32_t fileblock, int log2_blksz)
+static void ext4fs_extent_cache_insert(struct ext4_extent_node *new)
+{
+ struct ext4_extent_node *node;
+
+ list_for_each_entry(node, &ext4_extent_lh, lh)
+ if (node->block > new->block) {
+ list_add_tail(&new->lh, &node->lh);
+ return;
+ }
+ list_add_tail(&new->lh, &ext4_extent_lh);
+}
+
+static int __ext4fs_build_extent_cache(struct ext2_data *data,
+ struct ext4_extent_header *ext_block)
 {
+ int blksz = EXT2_BLOCK_SIZE(data);
+ int log2_blksz = LOG2_BLOCK_SIZE(data)
+ - get_fs()->dev_desc->log2blksz;
+ struct ext4_extent_node *node;
  struct ext4_extent_idx *index;
+ struct ext4_extent *extent;
  unsigned long long block;
- int blksz = EXT2_BLOCK_SIZE(data);
- int i;
+ char *buf;
+ int i, err;
 
- while (1) {
- index = (struct ext4_extent_idx *)(ext_block + 1);
+ if (le16_to_cpu(ext_block->eh_magic) != EXT4_EXT_MAGIC)
+ return -EINVAL;
 
- if (le16_to_cpu(ext_block->eh_magic) != EXT4_EXT_MAGIC)
- return 0;
-
- if (ext_block->eh_depth == 0)
- return ext_block;
- i = -1;
- do {
- i++;
- if (i >= le16_to_cpu(ext_block->eh_entries))
- break;
- } while (fileblock >= le32_to_cpu(index[i].ei_block));
+ if (ext_block->eh_depth == 0) {
+ extent = (struct ext4_extent *)(ext_block + 1);
+ for (i = 0; i < le16_to_cpu(ext_block->eh_entries); i++) {
+ node = malloc(sizeof(*node));
+ if (!node)
+ return -ENOMEM;
+ node->block = le32_to_cpu(extent[i].ee_block);
+ node->len = le16_to_cpu(extent[i].ee_len);
+ node->start = le16_to_cpu(extent[i].ee_start_hi);
+ node->start = (node->start << 32) +
+ le32_to_cpu(extent[i].ee_start_lo);
+ ext4fs_extent_cache_insert(node);
+ }
+ return 0;
+ }
 
- if (--i < 0)
- return 0;
+ index = (struct ext4_extent_idx *)(ext_block + 1);
+ for (i = 0; i < le16_to_cpu(ext_block->eh_entries); i++) {
+ buf = malloc(blksz);
+ if (!buf)
+ return -ENOMEM;
 
  block = le16_to_cpu(index[i].ei_leaf_hi);
  block = (block << 32) + le32_to_cpu(index[i].ei_leaf_lo);
 
- if (ext4fs_devread((lbaint_t)block << log2_blksz, 0, blksz,
-   buf))
- ext_block = (struct ext4_extent_header *)buf;
- else
- return 0;
+ if (!ext4fs_devread(block << log2_blksz, 0, blksz, buf)) {
+ free(buf);
+ return -EIO;
+ }
+
+ err = __ext4fs_build_extent_cache(data,
+ (struct ext4_extent_header *) buf);
+ free(buf);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+int ext4fs_build_extent_cache(struct ext2_inode *inode)
+{
+ return __ext4fs_build_extent_cache(ext4fs_root,
+ (struct ext4_extent_header *)
+ inode->b.blocks.dir_blocks);
+}
+
+void ext4fs_free_extent_cache(void)
+{
+ struct ext4_extent_node *node, *tmp;
+
+ list_for_each_entry_safe(node, tmp, &ext4_extent_lh, lh) {
+ list_del(&node->lh);
+ free(node);
  }
 }
 
+static struct ext4_extent_node *ext4fs_extent_cache_get(uint32_t block)
+{
+ struct ext4_extent_node *node;
+
+ list_for_each_entry(node, &ext4_extent_lh, lh)
+ if (block >= node->block && block < node->block + node->len)
+ return node;
+
+ return NULL;
+}
+
 static int ext4fs_blockgroup
  (struct ext2_data *data, int group, struct ext2_block_group *blkgrp)
 {
@@ -1508,54 +1574,22 @@ long int read_allocated_block(struct ext2_inode *inode, int fileblock)
  long int rblock;
  long int perblock_parent;
  long int perblock_child;
- unsigned long long start;
+
  /* get the blocksize of the filesystem */
  blksz = EXT2_BLOCK_SIZE(ext4fs_root);
  log2_blksz = LOG2_BLOCK_SIZE(ext4fs_root)
  - get_fs()->dev_desc->log2blksz;
 
  if (le32_to_cpu(inode->flags) & EXT4_EXTENTS_FL) {
- char *buf = zalloc(blksz);
- if (!buf)
- return -ENOMEM;
- struct ext4_extent_header *ext_block;
- struct ext4_extent *extent;
- int i = -1;
- ext_block =
- ext4fs_get_extent_block(ext4fs_root, buf,
- (struct ext4_extent_header *)
- inode->b.blocks.dir_blocks,
- fileblock, log2_blksz);
- if (!ext_block) {
- printf("invalid extent block\n");
- free(buf);
- return -EINVAL;
- }
-
- extent = (struct ext4_extent *)(ext_block + 1);
-
- do {
- i++;
- if (i >= le16_to_cpu(ext_block->eh_entries))
- break;
- } while (fileblock >= le32_to_cpu(extent[i].ee_block));
- if (--i >= 0) {
- fileblock -= le32_to_cpu(extent[i].ee_block);
- if (fileblock >= le16_to_cpu(extent[i].ee_len)) {
- free(buf);
- return 0;
- }
+ struct ext4_extent_node *node;
 
- start = le16_to_cpu(extent[i].ee_start_hi);
- start = (start << 32) +
- le32_to_cpu(extent[i].ee_start_lo);
- free(buf);
- return fileblock + start;
+ node = ext4fs_extent_cache_get(fileblock);
+ if (!node) {
+ printf("Extent Error\n");
+ return -1;
  }
 
- printf("Extent Error\n");
- free(buf);
- return -1;
+ return fileblock - node->block + node->start;
  }
 
  /* Direct blocks. */
diff --git a/fs/ext4/ext4_common.h b/fs/ext4/ext4_common.h
index 5fa1719..a9fd8c6 100644
--- a/fs/ext4/ext4_common.h
+++ b/fs/ext4/ext4_common.h
@@ -57,6 +57,9 @@ int ext4fs_find_file(const char *path, struct ext2fs_node *rootnode,
 int ext4fs_iterate_dir(struct ext2fs_node *dir, char *name,
  struct ext2fs_node **fnode, int *ftype);
 
+int ext4fs_build_extent_cache(struct ext2_inode *inode);
+void ext4fs_free_extent_cache(void);
+
 #if defined(CONFIG_EXT4_WRITE)
 uint32_t ext4fs_div_roundup(uint32_t size, uint32_t n);
 int ext4fs_checksum_update(unsigned int i);
diff --git a/fs/ext4/ext4fs.c b/fs/ext4/ext4fs.c
index 735b256..d34bc88 100644
--- a/fs/ext4/ext4fs.c
+++ b/fs/ext4/ext4fs.c
@@ -63,6 +63,14 @@ int ext4fs_read_file(struct ext2fs_node *node, int pos,
  char *delayed_buf = NULL;
  short status;
 
+ if (le32_to_cpu(node->inode.flags) & EXT4_EXTENTS_FL) {
+ if (ext4fs_build_extent_cache(&node->inode)) {
+ printf("Error building extent cache!\n");
+ len = -1;
+ goto out_exit;
+ }
+ }
+
  /* Adjust len so it we can't read past the end of the file. */
  if (len > filesize)
  len = filesize;
@@ -75,8 +83,10 @@ int ext4fs_read_file(struct ext2fs_node *node, int pos,
  int blockend = blocksize;
  int skipfirst = 0;
  blknr = read_allocated_block(&(node->inode), i);
- if (blknr < 0)
- return -1;
+ if (blknr < 0) {
+ len = -1;
+ goto out_exit;
+ }
 
  blknr = blknr << log2_fs_blocksize;
 
@@ -106,8 +116,10 @@ int ext4fs_read_file(struct ext2fs_node *node, int pos,
  delayed_skipfirst,
  delayed_extent,
  delayed_buf);
- if (status == 0)
- return -1;
+ if (status == 0) {
+ len = -1;
+ goto out_exit;
+ }
  previous_block_number = blknr;
  delayed_start = blknr;
  delayed_extent = blockend;
@@ -132,8 +144,10 @@ int ext4fs_read_file(struct ext2fs_node *node, int pos,
  delayed_skipfirst,
  delayed_extent,
  delayed_buf);
- if (status == 0)
- return -1;
+ if (status == 0) {
+ len = -1;
+ goto out_exit;
+ }
  previous_block_number = -1;
  }
  memset(buf, 0, blocksize - skipfirst);
@@ -145,11 +159,17 @@ int ext4fs_read_file(struct ext2fs_node *node, int pos,
  status = ext4fs_devread(delayed_start,
  delayed_skipfirst, delayed_extent,
  delayed_buf);
- if (status == 0)
- return -1;
+ if (status == 0) {
+ len = -1;
+ goto out_exit;
+ }
  previous_block_number = -1;
  }
 
+
+out_exit:
+ ext4fs_free_extent_cache();
+
  return len;
 }
 
--
1.7.9.5
_______________________________________________
U-Boot mailing list
[hidden email]
http://lists.denx.de/mailman/listinfo/u-boot
Reply | Threaded
Open this post in threaded view
|

Re: [U-Boot, v2] ext4fs: Add ext4 extent cache for read operations

Tom Rini
On Tue, Feb 04, 2014 at 03:48:10PM +0100, Ionut Nicu wrote:

> In an ext4 filesystem, the inode corresponding to a file has a 60-byte
> area which contains an extent header structure and up to 4 extent
> structures (5 x 12 bytes).
>
> For files that need more than 4 extents to be represented (either files
> larger than 4 x 128MB = 512MB or smaller files but very fragmented),
> ext4 creates extent index structures. Each extent index points to a 4KB
> physical block where one extent header and additional 340 extents could
> be stored.
>
> The current u-boot ext4 code is very inefficient when it tries to load a
> file which has extent indexes. For each logical file block the code will
> read over and over again the same blocks of 4096 bytes from the disk.
>
> Since the extent tree in a file is always the same, we can cache the
> extent structures in memory before actually starting to read the file.
>
> This patch creates a simple linked list of structures holding information
> about all the extents used to represent a file. The list is sorted by
> the logical block number (ee_block) so that we can easily find the
> proper extent information for any file block.
>
> Without this patch, a 69MB file which had just one extent index pointing
> to a block with another 6 extents was read in approximately 3 minutes.
> With this patch applied the same file can be read in almost 20 seconds.
>
> Signed-off-by: Ionut Nicu <[hidden email]>
Applied to u-boot/master, thanks!

--
Tom

_______________________________________________
U-Boot mailing list
[hidden email]
http://lists.denx.de/mailman/listinfo/u-boot

signature.asc (853 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

[Regression][ext4] Regression at EXT4 filesystem code

Lukasz Majewski
In reply to this post by Ionut Nicu
Dear All,

In the newest master u-boot
(SHA1: 1674df60d17e0e72396c961d5390bb62b184ad95)

I've found a regression with writing to EXT4 file system:

Writing uImage to ext4 partition - created with Debian's
mkfs.ext4 /dev/sdd2 (size 58M)

dfu-util -R -a2 -D arch/arm/boot/uImage
uImage size - 4.8 MiB

GADGET DRIVER: usb_dnl_dfu
USB PHY0 Enable
#Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
File System is consistent
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
ext4fs_devread read outside partition 4294967294
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
Extent Error
ext4fs_devread read outside partition 4294967294
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
part_offset is 26624
total_sector is 131072
error: overflow occurs
Extent Error
Extent Error
ext4fs_devread read outside partition 4294967294
part_offset is 26624
total_sector is 131072
error: overflow occurs
update journal finished
Extent Error
ext4fs_devread read outside partition 4294967294
part_offset is 26624
total_sector is 131072
error: overflow occurs

DFU complete CRC32: 0x3875a108
DOWNLOAD ... OK
Ctrl+C to exit ...
resetting ...

The old file is still present on the partition.


After reverting following patch:

Commit: "ext4fs: Add ext4 extent cache for read operations"
SHA1: fc0fc50f38a4d7d0554558076a79dfe8b0d78cd5

I am able to store files on the ext4 partition:

USB PHY0 Enable
#File System is consistent
file found deleting
update journal finished
File System is consistent
update journal finished

DFU complete CRC32: 0x3875a108
DOWNLOAD ... OK
Ctrl+C to exit ...
resetting ...


Any ideas how to fix it?

--
Best regards,

Lukasz Majewski

Samsung R&D Institute Poland (SRPOL) | Linux Platform Group
_______________________________________________
U-Boot mailing list
[hidden email]
http://lists.denx.de/mailman/listinfo/u-boot
Reply | Threaded
Open this post in threaded view
|

Re: [Regression][ext4] Regression at EXT4 filesystem code

Ionut Nicu

Hi,

On 24.02.2014 16:13, ext Lukasz Majewski wrote:

> Dear All,
>
> In the newest master u-boot
> (SHA1: 1674df60d17e0e72396c961d5390bb62b184ad95)
>
> I've found a regression with writing to EXT4 file system:
>
> Writing uImage to ext4 partition - created with Debian's
> mkfs.ext4 /dev/sdd2 (size 58M)
>
> dfu-util -R -a2 -D arch/arm/boot/uImage
> uImage size - 4.8 MiB
>
> GADGET DRIVER: usb_dnl_dfu
> USB PHY0 Enable
> #Extent Error
> ext4fs_devread read outside partition 4294967294
> Extent Error
>

Ugh ... It's pretty stupid, but my patch is completely ignoring the write path.
The error occurs because in read_allocated_block() I'm trying to use the extent
cache, but this extent cache is only built in ext4fs_read_file().

>
> After reverting following patch:
>
> Commit: "ext4fs: Add ext4 extent cache for read operations"
> SHA1: fc0fc50f38a4d7d0554558076a79dfe8b0d78cd5
>
> I am able to store files on the ext4 partition:
>
> USB PHY0 Enable
> #File System is consistent
> file found deleting
> update journal finished
> File System is consistent
> update journal finished
>
> DFU complete CRC32: 0x3875a108
> DOWNLOAD ... OK
> Ctrl+C to exit ...
> resetting ...
>
>
> Any ideas how to fix it?
>

I guess the best solution for the moment is to revert the patch and I'll try to
come up with an updated version which doesn't break the write functionality.

Sorry about this.

Regards,
Ionut
_______________________________________________
U-Boot mailing list
[hidden email]
http://lists.denx.de/mailman/listinfo/u-boot
Reply | Threaded
Open this post in threaded view
|

Re: [Regression][ext4] Regression at EXT4 filesystem code

Tom Rini
On Mon, Feb 24, 2014 at 05:42:59PM +0100, Ionut Nicu wrote:

>
> Hi,
>
> On 24.02.2014 16:13, ext Lukasz Majewski wrote:
> > Dear All,
> >
> > In the newest master u-boot
> > (SHA1: 1674df60d17e0e72396c961d5390bb62b184ad95)
> >
> > I've found a regression with writing to EXT4 file system:
> >
> > Writing uImage to ext4 partition - created with Debian's
> > mkfs.ext4 /dev/sdd2 (size 58M)
> >
> > dfu-util -R -a2 -D arch/arm/boot/uImage
> > uImage size - 4.8 MiB
> >
> > GADGET DRIVER: usb_dnl_dfu
> > USB PHY0 Enable
> > #Extent Error
> > ext4fs_devread read outside partition 4294967294
> > Extent Error
> >
>
> Ugh ... It's pretty stupid, but my patch is completely ignoring the write path.
> The error occurs because in read_allocated_block() I'm trying to use the extent
> cache, but this extent cache is only built in ext4fs_read_file().
>
> >
> > After reverting following patch:
> >
> > Commit: "ext4fs: Add ext4 extent cache for read operations"
> > SHA1: fc0fc50f38a4d7d0554558076a79dfe8b0d78cd5
> >
> > I am able to store files on the ext4 partition:
> >
> > USB PHY0 Enable
> > #File System is consistent
> > file found deleting
> > update journal finished
> > File System is consistent
> > update journal finished
> >
> > DFU complete CRC32: 0x3875a108
> > DOWNLOAD ... OK
> > Ctrl+C to exit ...
> > resetting ...
> >
> >
> > Any ideas how to fix it?
> >
>
> I guess the best solution for the moment is to revert the patch and I'll try to
> come up with an updated version which doesn't break the write functionality.
>
> Sorry about this.
Reverted for now, thanks!

--
Tom

_______________________________________________
U-Boot mailing list
[hidden email]
http://lists.denx.de/mailman/listinfo/u-boot

signature.asc (853 bytes) Download Attachment