Ïðèëîæåíèå A. Õåäúð ôàéëîâå

Ñúäúðæàíèå

A.1. configure.in
A.2. Makefile.am
A.3. Makefile.mig
A.4. libe3pager/Makefile.am
A.5. libe3pager/e3pager.h
A.6. libe3pager/priv.h
A.7. libscache/Makefile.am
A.8. libscache/scache.h
A.9. libjstore/Makefile.am
A.10. libjstore/jstore.h
A.11. libjstore/priv.h
A.12. libe3diskfs/Makefile.am
A.13. libe3diskfs/e3diskfs.h
A.14. libe3diskfs/diskfs-pager.h
A.15. ext3fs/Makefile.am
A.16. ext3fs/ext3fs.h
A.17. ext3fs/ext3_fs.h
A.18. ext3fs/ext3_fs_i.h
dnl Process this file with autoconf to produce a configure script.

AC_INIT(ext3fs, 0.1)
AM_INIT_AUTOMAKE

dnl create a config.h file (Automake will add -DHAVE_CONFIG_H)
AM_CONFIG_HEADER(config.h)

AC_SUBST(VERSION)

ISODATE=`date +%Y-%m-%d`
AC_SUBST(ISODATE)

AC_CANONICAL_HOST

AM_CPPFLAGS="-D_GNU_SOURCE=1 -D_FILE_OFFSET_BITS=64"
AC_SUBST(AM_CPPFLAGS)

AM_CFLAGS="-std=gnu99 -Wall"
AC_SUBST(AM_CFLAGS)

dnl Checks for programs.
AC_PROG_INSTALL
AC_PROG_CC
AC_PROG_LIBTOOL

AC_CHECK_TOOL(MIG, mig)

dnl Checks for libraries.

dnl Checks for header files.
AC_HEADER_STDC

dnl Checks for library functions.

AC_OUTPUT([Makefile
	libe3pager/Makefile libscache/Makefile libjstore/Makefile
	libe3diskfs/Makefile ext3fs/Makefile])
AUTOMAKE_OPTIONS = 1.8

SUBDIRS = libe3pager libscache libjstore libe3diskfs ext3fs
# How to build RPC stubs in -*- mode: makefile -*-
# Taken from Makeconf in the Hurd.

#$(MIG) must be set by the calling Makefile.am this way:
#MIG = @MIG@
MIG_CPPFLAGS = -x c # XXX: Is this the right way?
MIGCOM = $(MIG) -cc cat - /dev/null
MIGSFLAGS = -DSEQNOS

# We always need this setting, because libc does not include the bogus names.
MIGCOMFLAGS := -subrprefix __

# User settable variables:
#	mig-sheader-prefix prepend to foo_S.h for name of foo.defs stub header
# 	MIGSFLAGS	   flags to CPP when building server stubs and headers
#	foo_MIGSFLAGS	   same, but only for interface `foo'
# 	MIGCOMSFLAGS	   flags to MiG when building server stubs and headers
#	foo_MIGCOMSFLAGS   same, but only for interface `foo'
# 	MIGUFLAGS	   flags to CPP when building user stubs and headers
#	foo_MIGUFLAGS	   same, but only for interface `foo'
# 	MIGCOMUFLAGS	   flags to MiG when building user stubs and headers
#	foo_MIGCOMUFLAGS   same, but only for interface `foo'
#	CPPFLAGS	   flags to CPP

# Implicit rules for building server and user stubs from mig .defs files.

# These chained rules could be (and used to be) single rules using pipes.
# But it's convenient to be able to explicitly make the intermediate
# files when you want to deal with a problem in the MiG stub generator.
$(mig-sheader-prefix)%_S.h %Server.c: %.sdefsi
	$(MIGCOM) $(MIGCOMFLAGS) $(MIGCOMSFLAGS) $($*_MIGCOMSFLAGS) \
		    -sheader $(mig-sheader-prefix)$*_S.h -server $*Server.c \
		    -user /dev/null -header /dev/null < $<

%.sdefsi: %.defs
	$(CPP) $(MIG_CPPFLAGS) $(MIGSFLAGS) $($*_MIGSFLAGS) -DSERVERPREFIX=S_ $< -o $@
%.udefsi: %.defs
	$(CPP) $(MIG_CPPFLAGS) $(MIGUFLAGS) $($*_MIGUFLAGS) $< -o $@
%_U.h %User.c: %.udefsi
	$(MIGCOM) $(MIGCOMFLAGS) $(MIGCOMUFLAGS) $($*_MIGCOMUFLAGS) < $< \
		  -user $*User.c -server /dev/null -header $*_U.h

# Where to find .defs files.
vpath %.defs /include/mach /include/hurd

lib_LTLIBRARIES = libe3pager.la

libe3pager_la_SOURCES = \
	data-request.c data-return.c data-unlock.c pager-port.c \
	inhibit-term.c lock-completed.c lock-object.c mark-error.c \
	no-senders.c object-init.c object-terminate.c pagemap.c \
	pager-create.c pager-flush.c pager-shutdown.c pager-sync.c \
	stubs.c seqnos.c demuxer.c chg-compl.c pager-attr.c clean.c \
	dropweak.c notify-stubs.c get-upi.c pager-memcpy.c pager-return.c \
	offer-page.c \
	priv.h
include_HEADERS = e3pager.h
libe3pager_la_LDFLAGS = -version-info 0:0:0
# The following files are generated from /include/mach/*.defs
nodist_libe3pager_la_SOURCES = memory_objectServer.c notifyServer.c
BUILT_SOURCES = memory_object_S.h notify_S.h

MIG = @MIG@
MIGCOMSFLAGS = -prefix _pager_
include $(top_srcdir)/Makefile.mig
/* Definitions for multi-threaded pager library
   Copyright (C) 1994, 1995, 1996, 1997, 1999 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2, or (at
   your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */


/* For e3pager.h, use the same header symbol as hurd/pager.h so that
   hurd/pager.h don't get included.  */
#ifndef _HURD_PAGER_
#define _HURD_PAGER_

#include <hurd/ports.h>

/* This declaration exists to place struct user_pager_info in the proper
   scope.  */
struct user_pager_info;

/* This de-muxer function is for use within libports_demuxer. */
/* INP is a message we've received; OUTP will be filled in with
   a reply message.  */
int pager_demuxer (mach_msg_header_t *inp,
		   mach_msg_header_t *outp);

/* Create a new pager.  The pager will have a port created for it
   (using libports, in BUCKET) and will be immediately ready to
   receive requests.  U_PAGER will be provided to later calls to
   pager_find_address.  The pager will have one user reference
   created.  MAY_CACHE and COPY_STRATEGY are the original values of
   those attributes as for memory_object_ready.  If NOTIFY_ON_EVICT is
   non-zero, pager_notify_evict user callback will be called when page
   is evicted.  Users may create references to pagers by use of the
   relevant ports library functions.  On errors, return null and set
   errno.  */
struct pager *
pager_create (struct user_pager_info *u_pager,
	      struct port_bucket *bucket,
	      boolean_t may_cache,
	      memory_object_copy_strategy_t copy_strategy,
	      boolean_t notify_on_evict);

/* Return the user_pager_info struct associated with a pager. */
struct user_pager_info *
pager_get_upi (struct pager *p);

/* Sync data from pager PAGER to backing store; wait for
   all the writes to complete iff WAIT is set. */
void
pager_sync (struct pager *pager,
	    int wait);

/* Sync some data (starting at START, for LEN bytes) from pager PAGER
   to backing store.  Wait for all the writes to complete iff WAIT is
   set.  */
void
pager_sync_some (struct pager *pager,
		 vm_address_t start,
		 vm_size_t len,
		 int wait);

/* Flush data from the kernel for pager PAGER and force any pending
   delayed copies.  Wait for all pages to be flushed iff WAIT is set. */
void
pager_flush (struct pager *pager,
	     int wait);


/* Flush some data (starting at START, for LEN bytes) for pager PAGER
   from the kernel.  Wait for all pages to be flushed iff WAIT is set.  */
void
pager_flush_some (struct pager *pager,
		  vm_address_t start,
		  vm_size_t len,
		  int wait);

/* Flush data from the kernel for pager PAGER and force any pending
   delayed copies.  Wait for all pages to be flushed iff WAIT is set.
   Have the kernel write back modifications.  */
void
pager_return (struct pager *pager,
	      int wait);


/* Flush some data (starting at START, for LEN bytes) for pager PAGER
   from the kernel.  Wait for all pages to be flushed iff WAIT is set.  
   Have the kernel write back modifications. */
void
pager_return_some (struct pager *pager,
		   vm_address_t start,
		   vm_size_t len,
		   int wait);

/* Offer a page of data to the kernel.  If PRECIOUS is set, then this
   page will be paged out at some future point, otherwise it might be
   dropped by the kernel.  If the page is currently in core, the
   kernel might ignore this call.  */
void
pager_offer_page (struct pager *pager,
		  int precious,
		  int writelock,
		  vm_offset_t page,
		  vm_address_t buf);  

/* Change the attributes of the memory object underlying pager PAGER.
   Args MAY_CACHE and COPY_STRATEGY are as for
   memory_object_change_atributes.  Wait for the kernel to report completion
   iff WAIT is set.  */
void
pager_change_attributes (struct pager *pager,
			 boolean_t may_cache,
			 memory_object_copy_strategy_t copy_strategy,
			 int wait);

/* Return the port (receive right) for requests to the pager.  It is
   absolutely necessary that a new send right be created from this
   receive right.  */
mach_port_t
pager_get_port (struct pager *pager);

/* Force termination of a pager.  After this returns, no
   more paging requests on the pager will be honored, and the
   pager will be deallocated.  (The actual deallocation might
   occur asynchronously if there are currently outstanding paging
   requests that will complete first.)  */
void
pager_shutdown (struct pager *pager);

/* Return the error code of the last page error for pager P at address ADDR;
   this will be deleted when the kernel interface is fixed.  */
error_t
pager_get_error (struct pager *p, vm_address_t addr);

/* Try to copy *SIZE bytes between the region OTHER points to
   and the region at OFFSET in the pager indicated by PAGER and MEMOBJ.
   If PROT is VM_PROT_READ, copying is from the pager to OTHER;
   if PROT contains VM_PROT_WRITE, copying is from OTHER into the pager.
   *SIZE is always filled in the actual number of bytes successfully copied.
   Returns an error code if the pager-backed memory faults;
   if there is no fault, returns 0 and *SIZE will be unchanged.  */
error_t
pager_memcpy (struct pager *pager, memory_object_t memobj,
	      vm_offset_t offset, void *other, size_t *size,
	      vm_prot_t prot);

/* The user must define this function.  For pager PAGER, read one page
   from offset PAGE.  Set *BUF to be the address of the page, and set
   *WRITE_LOCK if the page must be provided read-only.  The only
   permissable error returns are EIO, EDQUOT, and ENOSPC. */
error_t
pager_read_page (struct user_pager_info *pager,
		 vm_offset_t page,
		 vm_address_t *buf,
		 int *write_lock);

/* The user must define this function.  For pager PAGER, synchronously
   write one page from BUF to offset PAGE.  In addition, mfree
   (or equivalent) BUF.  The only permissable error returns are EIO,
   EDQUOT, and ENOSPC. */
error_t
pager_write_page (struct user_pager_info *pager,
		  vm_offset_t page,
		  vm_address_t buf);

/* The user must define this function.  A page should be made writable. */
error_t
pager_unlock_page (struct user_pager_info *pager,
		   vm_offset_t address);

/* The user must define this function.  It is used when you want be
   able to change association of pages to backing store.  To use it,
   pass non-zero value in NOTIFY_ON_EVICT when pager is created.  You
   can change association of page only when pager_notify_evict has
   been called and you haven't touched page content after that.  */
void
pager_notify_evict (struct user_pager_info *pager,
		    vm_offset_t page);

/* The user must define this function.  It should report back (in
   *OFFSET and *SIZE the minimum valid address the pager will accept
   and the size of the object.   */
error_t
pager_report_extent (struct user_pager_info *pager,
		     vm_address_t *offset,
		     vm_size_t *size);

/* The user must define this function.  This is called when a pager is
   being deallocated after all extant send rights have been destroyed.  */
void
pager_clear_user_data (struct user_pager_info *pager);

/* The use must define this function.  This will be called when the ports
   library wants to drop weak references.  The pager library creates no
   weak references itself.  If the user doesn't either, then it's OK for
   this function to do nothing.  */
void
pager_dropweak (struct user_pager_info *p);

#endif
/* Private data for pager library.
   Copyright (C) 1994,95,96,97,99, 2000 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2, or (at
   your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */

#include <mach.h>
#include <hurd.h>
#include <sys/mman.h>
#include "e3pager.h"
#include <hurd/ports.h>

/* Define this if you think the kernel is sending memory_object_init
   out of sequence with memory_object_terminate. */
/* #undef KERNEL_INIT_RACE */

struct pager
{
  struct port_info port;
  struct user_pager_info *upi;

  enum
    {
      NOTINIT,			/* before memory_object_init */
      NORMAL,			/* while running */
      SHUTDOWN,			/* ignore all further requests */
    } pager_state;

  struct mutex interlock;
  struct condition wakeup;

  struct lock_request *lock_requests; /* pending lock requests */
  struct attribute_request *attribute_requests; /* pending attr requests */

  boolean_t may_cache;
  memory_object_copy_strategy_t copy_strategy;
  boolean_t notify_on_evict;

  /* Interface ports */
  memory_object_control_t memobjcntl;
  memory_object_name_t memobjname;

  int seqno;

  int noterm;			/* number of threads blocking termination */

  struct pager *next, **pprev;

  int termwaiting:1;
  int waitingforseqno:1;

#ifdef KERNEL_INIT_RACE
  /* Out of sequence object_init calls waiting for
     terminations. */
  struct pending_init *init_head, *init_tail;
#endif

  short *pagemap;
  int pagemapsize;		/* number of elements in PAGEMAP */
};

struct lock_request
{
  struct lock_request *next, **prevp;
  vm_address_t start, end;
  int pending_writes;
  int locks_pending;
  int threads_waiting;
};

struct attribute_request
{
  struct attribute_request *next, **prevp;
  boolean_t may_cache;
  memory_object_copy_strategy_t copy_strategy;
  int threads_waiting;
  int attrs_pending;
};

#ifdef KERNEL_INIT_RACE
struct pending_init
{
  mach_port_t control;
  mach_port_t name;
  struct pending_init *next;
};
#endif

enum page_errors
{
  PAGE_NOERR,
  PAGE_ENOSPC,
  PAGE_EIO,
  PAGE_EDQUOT,
};

extern int _pager_page_errors[];

/* Pagemap format */
/* These are binary state bits */
#define PM_FORCEREAD		0x0800 /* force pager_read_page */
#define PM_NOTIFY_PAGEOUT	0x0400 /* notify on page out */
#define PM_WRITEWAIT  0x0200	/* queue wakeup once write is done */
#define PM_INIT       0x0100    /* data has been written */
#define PM_INCORE     0x0080	/* kernel might have a copy */
#define PM_PAGINGOUT  0x0040	/* being written to disk */
#define PM_PAGEINWAIT 0x0020	/* provide data back when write done */
#define PM_INVALID    0x0010	/* data on disk is irrevocably wrong */

/* These take values of enum page_errors */

/* Doesn't belong here; this is the error that should have been passed
   through m_o_data_error to the user but isn't; this lets internal use
   of the pager know what the error is.  */
#define PM_ERROR(byte) (((byte) & 0xc) >> 2)
#define SET_PM_ERROR(byte,err) (((byte) & ~0xc) | ((err) << 2))

/* Issue this error on next data_request, but only if it asks for
   write access.  */
#define PM_NEXTERROR(byte) ((byte) & 0x3)
#define SET_PM_NEXTERROR(byte,err) (((byte) & ~0x3) | (err))

struct port_class *_pager_class;


void _pager_wait_for_seqno (struct pager *, int);
void _pager_release_seqno (struct pager *, int);
void _pager_block_termination (struct pager *);
void _pager_allow_termination (struct pager *);
error_t _pager_pagemap_resize (struct pager *, vm_address_t);
void _pager_mark_next_request_error (struct pager *, vm_address_t,
				     vm_size_t, error_t);
void _pager_mark_object_error (struct pager *, vm_address_t,
			       vm_size_t, error_t);
void _pager_lock_object (struct pager *, vm_offset_t, vm_size_t, int, int,
			 vm_prot_t, int);
void _pager_free_structure (struct pager *);
void _pager_clean (void *arg);
void _pager_real_dropweak (void *arg);
# The Library
lib_LTLIBRARIES = libscache.la

# Library version
libscache_la_LDFLAGS = -version-info 0:0:0

# Source files
libscache_la_SOURCES = close.c get.c bufset.c \
	open.c pager.c pager-ops.c put.c shadow.c
include_HEADERS = scache.h

# Both library and tests use libe3pager.
AM_CPPFLAGS += -I$(top_srcdir)/libe3pager

# Test suite
TESTS = test-open test-bufset test-copy test-shadow
check_PROGRAMS = $(TESTS)

# Library used in tests
LIBTEST = libtest.h libtest.c

# The tests
test_open_SOURCES = test-open.c $(LIBTEST)
test_open_LDFLAGS = -static
test_bufset_SOURCES = test-bufset.c $(LIBTEST)
test_bufset_LDFLAGS = -static
test_shadow_SOURCES = test-shadow.c $(LIBTEST)
test_shadow_LDFLAGS = -static
test_copy_SOURCES = test-copy.c $(LIBTEST)
test_copy_LDFLAGS = -static

# Tests use the following libraries.
# Doesn't affect building the library.
LDADD = libscache.la \
	../libe3pager/libe3pager.la \
	-lstore -lthreads -lports -lihash -lshouldbeinlibc
/* Store cache
   Copyright (C) 2003, 2004 Ognyan Kulev
   Copyright (C) 1996, 1997 Free Software Foundation, Inc.
   Written by Roland McGrath.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2, or (at
   your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */

#ifndef _SCACHE_H
#define _SCACHE_H

#include <assert.h>
#include <setjmp.h>
#include <stdint.h>
#include <stdlib.h>
#include <cthreads.h>
#include <e3pager.h>
#include <hurd/ihash.h>
#include <hurd/ports.h>
#include <hurd/store.h>

struct scache
{
  struct store *store;

  size_t block_size;
  unsigned log2_block_size;

  struct pager *pager;
  struct scache_pager_info *pager_info;
  void *image;
  
  /** Lock for changeable fields.  */
  struct mutex lock;

  /** Fired when any reassociation is complete.  */
  struct condition reassociation;


  size_t size;
  size_t blocks;
  struct scache_buffer *buffer;
  size_t hint;
  hurd_ihash_t hash;
};

typedef store_offset_t scache_block_t;

#define SCACHE_NO_BLOCK	((scache_block_t) -1)

#define SCB_VALID		(1 << 0)
#define SCB_INCORE		(1 << 1)
#define SCB_UNREAD		(1 << 2)
#define SCB_DONT_READ		(1 << 3)
#define SCB_SHADOW		(1 << 4)

#define SCB_DONT_REUSE	(SCB_INCORE | SCB_SHADOW | SCB_UNREAD)

#define SCACHE_BLOCK_XOR	0xDeadBeef

/**
 * Buffer for one block of scache.
 */
struct scache_buffer
{
  /** The scache to which this buffer belongs.  */
  struct scache *scache;

  /** The number of the block that this buffer contains.  */
  scache_block_t block;

  /** The last read block in this buffer.  */
  scache_block_t last_read_block;

  /** Checksum for last_read_block.  This is XOR between
      last_read_block and SCACHE_BLOCK_XOR.  */
  scache_block_t last_read_block_xor;

  /** The data of the buffer.  This field can be used by clients.  */
  void *data;

  /** Lock for FLAGS, REF_COUNT and BUFSETS.

      Fields SCACHE, BLOCK and DATA are not expected to ever be
      changed while there is some reference to buffer.  */
  struct mutex lock;

  /** SCB_* flags of this buffer.  */
  unsigned flags;

  /** Number of references to this buffer.  When dropped to 0, the
      buffer can be discarded.  When it's incremented, SCB_UNREAD
      must be cleared.  */
  unsigned ref_count;

  /** Buffer sets that contain this buffer.  Key is pointer to struct
      scache_bufset, value is always NULL.  The primary purpose of
      this field is to remove buffer from all buffer sets that contain
      it when buffer is discarded.  */
  hurd_ihash_t bufsets;
};

/** Set of buffers.  One buffer can be in many bufsets.  */
struct scache_bufset
{
  /** The scache of the buffers.  */
  struct scache *scache;

  /** Lock for data structures that hold the set.  */
  struct mutex lock;

  /** The set itself represented as associations of block number with
      buffer.  */
  hurd_ihash_t buffers;
};

/**
 * Open scache from given STORE.
 *
 * @param store The backing store.  libscaches take ownership of it.
 * Even if there is an error, STORE is consumed.
 * @param block_size Size of the blocks that will be requested.
 * @param cache_size_hint Client hint for the size of the cache.  May
 * be ignored by implementation.
 * @param bucket Bucket for the port of the used pager.  Can be NULL,
 * in which case libscache will use internal bucket.
 * @return The new scache.
 */
struct scache *
scache_open (struct store *store, size_t block_size,
	     size_t cache_size_hint, struct port_bucket *bucket);

/**
 * Close scache.
 *
 * All used references are released.
 *
 * @param scache The store cache being closed.
 */
void
scache_close (struct scache *scache);

/**
 * Get the user_pager_info used by scache.
 *
 * XXX Why client needs this?
 *
 * @param scache The store cache.
 * @return user_pager_info used by this scache.
 */
struct scache_pager_info *
scache_get_pager_info (struct scache *scache);

/* Internal: get locked new buffer for reuse.  */
struct scache_buffer *
_scache_buffer_new (struct scache *scache);

/**
 * Get buffer for given block.
 *
 * Reference will be added to the buffer.
 *
 * This function never returns shadow buffer.  So if there are shadow
 * buffers and there is no real buffer for block BLOCK in store cache
 * SCACHE, NULL will be returned.
 *
 * @param scache The store cache.
 * @param block Number of block that is requested.
 * @return Buffer for requested block.
 */
struct scache_buffer *
scache_get (struct scache *scache,
	    scache_block_t block);

/**
 * Get zeroed buffer for given block.  XXX: scache_get_zeroed?
 *
 * Using this function instead of scache_get will skip reading this
 * block from store.  Instead, it will be filled with zeroes.
 *
 * Reference will be added to the buffer, although if you use this
 * function, you don't want this buffer to be already referenced.
 *
 * @param scache The store cache.
 * @param block Number of block that is requested.
 * @return Buffer for requested block.
 */
struct scache_buffer *
scache_get_new (struct scache *scache,
		scache_block_t block);

/**
 * Increment reference count of buffer.
 *
 * This function is convenient when you pass this buffer to some
 * function where reference will be consumed and you don't want to
 * lose the buffer.
 *
 * @param scache The store cache of buffer.
 * @param buffer Buffer that need incremented reference count.
 */
void
scache_ref (struct scache *scache, struct scache_buffer *buffer);

/**
 * Return back buffer to scache, decrementing buffer's reference
 * count.
 *
 * @param scache The store cache.
 * @param buffer The buffer that is returned to scache.
 */
void
scache_put (struct scache *scache,
	    struct scache_buffer *buffer);

/** Make new buffer as copy of existing buffer.
 *
 * Returns buffer for block TARGET_BLOCK in store cache TARGET_SCACHE,
 * whose content is copied from buffer BUFFER of store cache SCACHE.
 * Reference count is incremented.
 *
 * Store caches TARGET_SCACHE and SCACHE can be different, as can be
 * block number of BUFFER and block TARGET_BLOCK.
 *
 * If WANT_SHADOW is non-zero, the returned buffer will be shadow and
 * it will always be created and its content copied from buffer
 * BUFFER, regardless if there are other shadow or real buffers for
 * the same block TARGET_BLOCK in store cache TARGET_SCACHE.  See
 * scache_set_shadow for more information about shadow buffers.
 *
 * If WANT_SHADOW is zero and there is real buffer for block TARGET_BLOCK in
 * store cache TARGET_SCACHE, this buffer is returned as result and
 * its content will be copied from buffer BUFFER.
 *
 * If WANT_SHADOW is zero and there is no real buffer for block
 * TARGET_BLOCK in store cache TARGET_SCACHE, even if there are shadow
 * buffers, new real buffer for block TARGET_BLOCK in store cache
 * TARGET_SCACHE is created and its content will be copy from buffer
 * BUFFER.
 *
 * If SCACHE and TARGET_SCACHE are different store caches, they must
 * have same block size.
 *
 * @param scache The store cache.
 * @param buffer The buffer to be copied.
 * @param target_scache The store cache of the copy.  If it's NULL,
 * SCACHE argument is used.
 * @param target_block The block number of the copy.  If it's
 * SCACHE_NO_BLOCK, the block number of BUFFER is assumed.
 * @param want_shadow If non-zero, we want shadow buffer, not real
 * buffer.  See function long description for more information.
 * @return New buffer with copied content from BUFFER that represents
 * TARGET_BLOCK of TARGET_SCACHE.
 */
struct scache_buffer *
scache_copy (struct scache *scache,
	     struct scache_buffer *buffer,
	     struct scache *target_scache,
	     scache_block_t target_block,
	     int want_shadow);

error_t
_scache_buffer_deassoc (struct scache *scache, struct scache_buffer *buffer);

/**
 * Create new buffer set.
 *
 * Buffer sets are set of buffers for convenient mass operations like
 * "return" or "find".
 *
 * When buffer is automatically deleted from store cache, it vanishes
 * from all buffer sets that included it too.  Note that the only way
 * shadow buffer to be deleted is to mark it as self destructable and
 * drop its reference count to zero.
 *
 * @param scache The store cache of the future buffers in the buffer
 * set.
 * @return New empty buffer set.
 */
struct scache_bufset *
scache_bufset_new (struct scache *scache);

/**
 * Return to backing store all buffer content of buffer set.
 *
 * @param bufset The buffer set.
 * @param wait If not 0, function will return after all buffer
 * contents are returned.
 * @return 0 on success, errno on error.
 */
error_t
scache_bufset_return (struct scache_bufset *bufset, int wait);

/**
 * Flush to backing store all buffer content of buffer set.
 *
 * The changes in the buffers will be lost!
 *
 * @param bufset The buffer set.
 * @param wait If not 0, function will return after all buffer
 * contents are flushed.
 * @return 0 on success, errno on error.
 */
error_t
scache_bufset_flush (struct scache_bufset *bufset, int wait);

/**
 * Free buffer set.
 *
 * @param bufset The buffer set.
 */
void
scache_bufset_delete (struct scache_bufset *bufset);

/**
 * Add buffer to buffer set.
 *
 * @param bufset The buffer set.
 * @param buffer The buffer to be added.
 * @return 0 on success, errno on error.
 */
error_t
scache_bufset_add (struct scache_bufset *bufset,
		   struct scache_buffer *buffer);

/**
 * Remove buffer from buffer set.
 *
 * @param bufset The buffer set.
 * @param buffer The buffer to be removed.
 * @return 0 on success, errno on error.
 */
error_t
scache_bufset_remove (struct scache_bufset *bufset,
		      struct scache_buffer *buffer);

/**
 * Search for specific buffer in buffer set.
 *
 * This function is used to check if buffer is in buffer set.
 *
 * @param bufset The buffer set.
 * @param buffer The buffer to be searched.
 * @return BUFFER if it's found, otherwise NULL.
 */
struct scache_buffer *
scache_bufset_find (struct scache_bufset *bufset,
		    struct scache_buffer *buffer);

/**
 * Iterate through all buffers in buffer set.
 *
 * During iteration, the buffer set is locked, so don't refer to it.
 *
 * XXX: Buffers that are currently being reassociated are skipped.
 *
 * @param bufset The buffer set.
 * @param func Function to run for each buffer.
 * @param arg Additional argument to pass to FUNC.
 * @return Error from FUNC, or zero for success.
 */
error_t
scache_bufset_iterate (struct scache_bufset *bufset,
		       error_t (*func) (struct scache_buffer *, void *),
		       void *arg);
/**
 * Search for block in buffer set.
 *
 * The returned buffer has incremented reference count.
 *
 * This function is used when there are shadows of a block in store
 * cache, in which case it's expected that different shadows are in
 * different buffer sets.  If there are multiple buffers in buffer set
 * BUFSET for block BLOCK and one of them is real buffer, this real
 * buffer is returned.  If all of them are shadows, NULL is returned.
 * If there is only one shadow, it's returned.
 *
 * @param bufset The buffer set.
 * @param block The block in the store cache of the buffer set that is
 * to be searched.
 * @return The found buffer BUFFER, or NULL if it's not found or there
 * are disambiguities.
 */
struct scache_buffer *
scache_bufset_get (struct scache_bufset *bufset,
		   scache_block_t block);

/**
 * Return back buffer to store cache, decrementing buffer's reference
 * count and adding it to buffer set.
 *
 * @param bufset The buffer set where BUFFER is to be added.
 * @param buffer The buffer that is returned to the store cache of
 * BUFSET.
 * @return 0 on success, errno on error.
 */
error_t
scache_bufset_put (struct scache_bufset *bufset,
		   struct scache_buffer *buffer);

/**
 * This magic value should be in scache_pager_info->magic.
 */
#define SCACHE_PAGER_INFO_MAGIC	0x81726354

/**
 * user_pager_info for store cache.
 */
struct scache_pager_info
{
  /**
   * Should be SCACHE_PAGER_INFO_MAGIC.
   */
  uint32_t magic;

  /**
   * The store cache.
   */
  struct scache *scache;
};

/**
 * Internal: Create pager for store cache.
 *
 * @param scache The store cache.
 * @param bucket The bucket where created pager should be put in.  Can
 * be NULL, in which case internal bucket will be created.
 * @return 0 on success, errno on error.
 */
error_t
_scache_new_pager (struct scache *scache,
		   struct port_bucket *bucket);

struct _scache_exception_block
  {
    struct scache *scache;
    jmp_buf env;
    struct _scache_exception_block *next;
  };

/* Return zero now.  Return a second time with a nonzero error_t
   if this thread faults accessing `scache->image' before calling
   `storecahe_end_catch_exception' (below).  */
#define scache_catch_exception(scache_arg)				      \
({									      \
    struct _scache_exception_block *sceb = alloca (sizeof *sceb);	      \
    error_t err;							      \
    sceb->scache = (scache_arg);					      \
    sceb->next = (void *) cthread_data (cthread_self ());		      \
    err = setjmp (sceb->env);						      \
    if (err == 0)							      \
      cthread_set_data (cthread_self (), sceb);				      \
    err;								      \
})

/* No longer handle faults on `scache->image' in this thread.
   Any unexpected fault hereafter will crash the program.  */
#define scache_end_catch_exception()					      \
({									      \
    struct _scache_exception_block *sceb;				      \
    sceb  = (void *) cthread_data (cthread_self ());   			      \
    cthread_set_data (cthread_self (), sceb->next);			      \
})

/* The following set of functions are callbacks of libe3pager.  User
   must define the real callbacks, e.g. pager_read_page, and test if
   *((uint32_t *)upi) == SCACHE_PAGER_INFO_MAGIC.  If this is the
   case, the processing of the callback must be passed to the
   scache_ counter-part, e.g. scache_read_page.  */

error_t
scache_pager_read_page (struct scache_pager_info *pager_info,
			vm_offset_t page, vm_address_t *buf,
			int *writelock);

error_t
scache_pager_write_page (struct scache_pager_info *pager_info,
			 vm_offset_t page, vm_address_t buf);

error_t
scache_pager_unlock_page (struct scache_pager_info *pager_info,
			  vm_offset_t address);

void
scache_pager_notify_evict (struct scache_pager_info *pager_info,
			   vm_offset_t page);


error_t
scache_pager_report_extent (struct scache_pager_info *pager_info,
			    vm_address_t *offset, vm_size_t *size);

void
scache_pager_clear_user_data (struct scache_pager_info *pager_info);

void
scache_pager_dropweak (struct scache_pager_info *pager_info);

#ifdef SCACHE_DEBUG_LOCK
#define SCACHE_LOCK(o) \
  do { \
    printf ("%s:   LOCK %p\n", __FUNCTION__, (o)); \
    mutex_lock (&(o)->lock); \
  } while (0)
#define SCACHE_UNLOCK(o) \
  do { \
    printf ("%s: UNLOCK %p\n",  __FUNCTION__, (o)); \
    mutex_unlock (&(o)->lock); \
  } while (0)
#else
#define SCACHE_LOCK(o)		mutex_lock (&(o)->lock)
#define SCACHE_UNLOCK(o)	mutex_unlock (&(o)->lock)
#endif

#ifdef SCACHE_DEBUG_PRINTF
#include <stdio.h>
#define SCACHE_PRINTF(fmt, args...) printf ("%s: " fmt, __FUNCTION__, args)
#else
#define SCACHE_PRINTF(fmt, args...) (void)0
#endif

#endif /* _SCACHE_H */
lib_LTLIBRARIES = libjstore.la

libjstore_la_SOURCES = change-readonly.c checkpoint.c close.c commit.c \
	get.c jbuffer.c jstore.c open.c panic.c put.c recover.c revoke.c \
	transaction.c update.c \
	priv.h jbd.h
include_HEADERS = jstore.h
libjstore_la_LDFLAGS = -version-info 0:0:0

libjstore_la_CPPFLAGS = $(AM_CPPFLAGS) \
	-I$(top_srcdir)/libscache -I$(top_srcdir)/libe3pager \
	-DWAIT_DEBUG=1
/* Journalled store
   Copyright (C) 2003, 2004 Ognyan Kulev

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
   USA
*/

#ifndef _JSTORE_H
#define _JSTORE_H

#include <stdint.h>
#include <error.h>
#include <scache.h>

/* Error handling.

On any error fatal, journal should be aborted.  All user functions
except jstore_close (others) just return error.  Error is marked in
the journal.
*/

struct jstore;
struct jstore_update;

#define JSTORE_OPEN_RECOVER	(1 << 0)
#define JSTORE_OPEN_READONLY	(1 << 1)

/* Opening and closing journal.  */

/**
 * Open journalled store.
 *
 * When FLAGS has JSTORE_OPEN_RECOVER, recover data store using
 * journal store, if needed.
 *
 * @param journal_scache Journal store cache.
 * @param data_scache Data store cache.
 * @param flags Flags for opening.
 * @return New journalled store ready for updates.
 */
struct jstore *
jstore_open (struct scache *journal_scache,
	     struct scache *data_scache, int flags);

/**
 * Change readonly state of journal.
 *
 * When writable jstore turns into readonly, it won't be candidate for
 * recovery if journal is not properly closed.  When jstore is
 * readonly, only clean buffers can be returned with jstore_put.
 *
 * @param jstore Journalled store for closing.
 * @return Error code or zero for success.  EALREADY when readonly
 * jstore is tried to be changed to readonly, or writable jstore is
 * tried to be changed to writable.
 */
error_t
jstore_change_readonly (struct jstore *jstore, int readonly,
			struct jstore_update *update);

/**
 * Close journalled store.
 *
 * Finish all transaction and updates and leaves journal ready for
 * clean opening.
 *
 * @param jstore Journalled store for closing.
 * @return Error code or zero for success.
 */
error_t
jstore_close (struct jstore *jstore);

/**
 * Force commit of current transaction.
 *
 * @return Error code or zero for success.
 */
error_t
jstore_commit (struct jstore *jstore);


/* Transactions in JBD are hidden in jstore.  User generally only
   needs to know about "updates".  This structure is known as "handle"
   in Linux JBD.  */

/**
 * Begin single update.
 *
 * @param jstore Journalled store.
 * @param credits Expected maximum number of blocks that will be
 * touched.
 * @return New journalled store update.
 */
struct jstore_update *
jstore_update_begin (struct jstore *jstore, scache_block_t credits);

/**
 * Request more blocks for touching.
 *
 * @param update Journaled store update.
 * @param blocks Wanted number of blocks to touch.
 * @param allow_restart If transaction hasn't enough credit, should we
 * begin new transaction.
 * @return Error code or zero for success.
 */
error_t
jstore_update_more_credits (struct jstore_update *update,
			    scache_block_t blocks, int allow_restart);

/**
 * Finish journalled store update.
 *
 * @param update Journalled store update.
 * @return Error code or zero for success.
 */
error_t
jstore_update_end (struct jstore_update *update);


/* The functions in JBD for accessing blocks are replaced here by
   "get" and "put" which are controlled by flags.  */

/* Flags for jstore_block_get.  */
#define JSTORE_GET_CREATE	(1 << 0) /* For both data and metadata.  */
#define JSTORE_GET_COMMITTED	(1 << 1) /* Can be used only alone.  */
#define JSTORE_GET_DATA		(1 << 1) /* Non-journalled data.  */

/**
 * Get buffer for block that will be touched.
 *
 * @param update Journalled store update.
 * @param block Block number (in data store).
 * @param flags Details about what exactly we want.
 * @return Store cache buffer (with reference).
 */
struct scache_buffer *
jstore_block_get (struct jstore_update *update,
		  scache_block_t block, int flags);

/* Flags for jstore_block_put.  */
#define JSTORE_PUT_NOTMODIFIED		(1 << 0) /* "Release" in JBD.  */
#define JSTORE_PUT_FORGET		(1 << 1) /* Ignore modifications. */
#define JSTORE_PUT_REVOKE		(1 << 2) /* Becomes data */
#define JSTORE_PUT_CLEAN		(1 << 3) /* No changes */

/**
 * Return buffer to journalled store update.
 *
 * Consumes buffer's reference.
 *
 * @param update Journalled store update.
 * @param buffer Returned store cache buffer.
 * @param flags Details about returning buffer.
 */
void
jstore_block_put (struct jstore_update *update,
		  struct scache_buffer *buffer, int flags);


#endif
/* Private definitions for journalled store
   Copyright (C) 2003, 2004 Ognyan Kulev

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
   USA
*/

#ifndef _JSTORE_PRIV_H
#define _JSTORE_PRIV_H

#include <stddef.h>
#include <string.h>
#include <cthreads.h>
#include <netinet/in.h>		/* For ntohl and htonl.  */
#include "jstore.h"
#include "jbd.h"

#define TRANSACTION_INITIAL_CREDITS	300 /* XXX */

struct jstore_transaction;

struct jstore_hole
{
  scache_block_t begin, end;
  tid_t end_sequence;
};

/**
 * Journalled store.
 */
struct jstore
{
  /** Lock for some of the fields.  Fields that require lock are only
      these that can change during jstore lifetime.  */
  struct mutex lock;

  /** Condition fired when transaction status is changed.  */
  struct condition condition;

  /** If journal is open read-only, this is non-zero.  */
  int is_readonly;

  /** The store cache of the journal itself.  Does not change during
      jstore lifetime.  */
  struct scache *journal_scache;
  /** The store cache of the journalled store.  Does not change during
      jstore lifetime.  */
  struct scache *data_scache;

  /** Buffer for journal's superblock.  Does not change during jstore
      lifetime.  */
  struct scache_buffer *superblock_buffer;
  /** Buffer set that contains only the buffer of the superblock.  */
  struct scache_bufset *superblock_bufset;
  /** Pointer to journal's superblock.  Does not change during jstore
      lifetime.  */
  struct journal_superblock_s *superblock;

  /** Running transaction where all new updates go.  */
  struct jstore_transaction *running_transaction;
  /** Committing transaction which is currently written to journal.  */
  struct jstore_transaction *committing_transaction;
  /** List of checkpoint transactions.  Pointer to transaction is both
      key and value.  */
  hurd_ihash_t checkpointed;
  struct jstore_hole *checkpointed_holes;
  scache_block_t checkpointed_holes_count;
  scache_block_t checkpointed_holes_size;

  /** The oldest transaction in journal.  This transaction can't still
      be overwritten in journal because its metadata is not in data
      store yet.  */
  tid_t tail_sequence;		
  /** Next id to be granted for new transaction.  */
  tid_t next_sequence;

  /** The first unused block.  */
  scache_block_t head_block;
  /** The oldest still-used block.  */
  scache_block_t tail_block;
  /** Number of free unused blocks.  */
  scache_block_t free_nblocks;
  /** The first usable block.  */
  scache_block_t first_block;
  /** Last usable block + 1.  */
  scache_block_t last_block;

  /** Uncleared error from abort.  */
  int32_t err;

  /** All blocks that are in some journalled store's bufset.  Key is
      block number, value is buffer set that should be used by
      jstore_get.  To use jstore must be locked.  To change, both
      jstore and transaction of the buffer set must be locked (XXX: do
      we really need this?).  */
  hurd_ihash_t all_blocks;

  /** Buffers of the blocks in all_blocks.  Used because to check if
      all_blocks is valid at all.  For example, buffer may be removed
      from buffer set without we get notified, and when the block is
      searched, errors will occur.  */
  struct scache_bufset *all_blocks_bufset;
};

/**
 * Single update.  Part of transaction.
 */
struct jstore_update
{
  struct mutex lock;		/* For credits.  */

  struct jstore *jstore;
  struct jstore_transaction *transaction;

  scache_block_t credits;
};


/* Clients don't see transactions, so transaction interface is here.  */ 

/**
 * Transaction.  It consists of many jstore_update.
 */
struct jstore_transaction
{
  /** Lock for all fields except jstore (because it doesn't change).  */
  struct mutex lock;

  /** Condition for when ref_count is changed.  */
  struct condition condition;

  /** Journalled store.  */
  struct jstore *jstore;

  /** Sequence number of this transaction.  */
  tid_t sequence;

  /** Revoked blocks.  Key and value are both the block number.  */
  hurd_ihash_t revoked;

  /** Data buffers.  Used in running and committing transactions.  To
      use, you need jstore locked.  */
  struct scache_bufset *data_bufset;
  /** Metadata buffers.  Used in running and committing transactions,
      and in checkpoint transactions too.  These are shadows.  To use,
      you need jstore locked.  */
  struct scache_bufset *metadata_bufset;
  /** Committed metadata buffers.  Used in running transaction.  To
      use, you need jstore locked.  */
  struct scache_bufset *committed_bufset;
  /** Log buffers.  Used only in committing transaction, which is in
      one thread only and doesn't need locking.  */
  struct scache_bufset *log_bufset;
  /** XXX Hack */
  struct scache_bufset *shadow_metadata_bufset;

  /** Credit for this transaction.  Used in running transaction.  */
  scache_block_t credits;

  /** Reference count for this transaction.  Used in running
      transaction.  */
  int ref_count;

  /** Begin and end block of transaction in journal store.  Used in
      checkpointed transaction.  */
  scache_block_t begin, end;
};

/**
 * Create new transaction.
 *
 * @param jstore Journalled store where transaction will belong to.
 * @return New transaction.
 */
struct jstore_transaction *
_jstore_transaction_new (struct jstore *jstore);

/**
 * Delete transaction.
 *
 * @param transaction Transaction to be deleted.
 */
void
_jstore_transaction_delete (struct jstore_transaction *transaction);

/**
 * Return running transaction of given jstore.
 *
 * @param jstore Journalled store.
 * @return Running transaction in the journalled store.
 */
struct jstore_transaction *
_jstore_get_running_transaction (struct jstore *jstore);

/**
 * Release reference to transaction.
 *
 * @param transaction Transaction with reference to release.
 */
void
_jstore_transaction_put (struct jstore_transaction *transaction);

/**
 * Clean up checkpointed transactions that have finished.
 *
 * @param jstore Journalled store;
 */
void
_jstore_cleanup_checkpointed (struct jstore *jstore);

/**
 * Commit running transaction.
 *
 * More precisely, schedule this transaction for committing.
 */
error_t
_jstore_commit (struct jstore *jstore);

/**
 * Prepare to shutdown journal or make it readonly.
 */
void
_jstore_flush (struct jstore *jstore);

/**
 * Replay journal after crash.
 */
error_t jstore_recover (struct jstore *jstore);
/**
 * Wipe journal store, preparing it for starting of journalling.
 */
error_t jstore_wipe (struct jstore *jstore);
/**
 * Reset on-store data structures in journal.
 *
 * After that, the journal is ready for creating and committing
 * transactions.
 */
error_t jstore_reset (struct jstore *jstore);

/**
 * Write changes in journal superblock to its store.
 */
error_t jstore_update_superblock (struct jstore *jstore);

struct scache_buffer *
_jstore_jbuffer_get (struct jstore *jstore, uint32_t blocktype);

error_t
_jstore_jbuffer_put (struct jstore *jstore, struct scache_buffer *jbuffer);


/**
 * Mark block as revoked.
 */
error_t _jstore_revoked_add (struct jstore *jstore,
			     scache_block_t block);

/**
 * Test if block is revoked.
 */
error_t _jstore_revoked_find (struct jstore *jstore,
			      scache_block_t block);

/**
 * Unmark block as revoked.
 */
error_t _jstore_revoked_remove (struct jstore *jstore,
				scache_block_t block);


/**
 * Panic after unrecoverable error.
 */
void jstore_panic (char *format, ...);

/**
 * Print error to stderr.
 */
void jstore_error (char *format, ...);

#ifdef JSTORE_DEBUG_LOCK
#define JSTORE_LOCK(o) \
  do { \
    printf ("  LOCK %p in %s\n", (o), __FUNCTION__); \
    mutex_lock (&(o)->lock); \
  } while (0)
#define JSTORE_UNLOCK(o) \
  do { \
    printf ("UNLOCK %p in %s\n", (o), __FUNCTION__); \
    mutex_unlock (&(o)->lock); \
  } while (0)
#else
#define JSTORE_LOCK(o)		mutex_lock (&(o)->lock)
#define JSTORE_UNLOCK(o)	mutex_unlock (&(o)->lock)
#endif

#ifdef JSTORE_DEBUG_PRINTF
#include <stdio.h>
#define JSTORE_PRINTF(args...) \
  do { \
    printf ("%s:", __FUNCTION__); \
    printf (" " args); \
    printf ("\n"); \
  } while (0)
#else
#define JSTORE_PRINTF(args...) (void)0
#endif

#endif /* _JSTORE_PRIV_H */
lib_LTLIBRARIES = libe3diskfs.la
libe3diskfs_la_LDFLAGS = -version-info 0:0:0

# Lists of headers and sources.
FSSRCS= dir-chg.c dir-link.c dir-lookup.c dir-mkdir.c dir-mkfile.c \
	dir-readdir.c dir-rename.c dir-rmdir.c dir-unlink.c \
	file-access.c file-chauthor.c file-chflags.c file-chg.c \
	file-chmod.c file-chown.c file-exec.c file-get-fs-opts.c \
	file-get-trans.c file-get-transcntl.c file-getcontrol.c \
	file-getfh.c file-getlinknode.c file-lock-stat.c \
	file-lock.c file-set-size.c file-set-trans.c file-statfs.c \
	file-sync.c file-syncfs.c file-utimes.c file-reparent.c
IOSRCS= io-async-icky.c io-async.c io-duplicate.c io-get-conch.c io-revoke.c \
	io-map-cntl.c io-map.c io-modes-get.c io-modes-off.c \
	io-modes-on.c io-modes-set.c io-owner-mod.c io-owner-get.c \
	io-pathconf.c io-prenotify.c io-read.c io-readable.c io-identity.c \
	io-reauthenticate.c io-rel-conch.c io-restrict-auth.c io-seek.c \
	io-select.c io-stat.c io-stubs.c io-write.c io-version.c io-sigio.c
FSYSSRCS=fsys-getroot.c fsys-goaway.c fsys-startup.c fsys-getfile.c \
	fsys-options.c fsys-syncfs.c fsys-forward.c
IFSOCKSRCS=ifsock.c
OTHERSRCS = conch-fetch.c conch-set.c dir-clear.c dir-init.c dir-renamed.c \
	extern-inline.c \
	node-create.c node-drop.c node-make.c node-rdwr.c node-update.c \
	node-nref.c node-nput.c node-nrele.c node-nrefl.c node-nputl.c \
	node-nrelel.c \
	peropen-make.c peropen-rele.c protid-make.c protid-rele.c \
	init-init.c init-startup.c init-first.c init-main.c \
	rdwr-internal.c boot-start.c demuxer.c node-times.c shutdown.c \
	sync-interval.c sync-default.c \
	opts-set.c opts-get.c opts-std-startup.c opts-std-runtime.c \
        opts-append-std.c opts-common.c opts-runtime.c opts-version.c \
	trans-callback.c readonly.c readonly-changed.c \
	remount.c console.c disk-pager.c \
	name-cache.c direnter.c dirrewrite.c dirremove.c lookup.c dead-name.c \
	validate-mode.c validate-group.c validate-author.c validate-flags.c \
	validate-rdev.c validate-owner.c extra-version.c
SRCS = $(OTHERSRCS) $(FSSRCS) $(IOSRCS) $(FSYSSRCS) $(IFSOCKSRCS)
LCLHDRS = priv.h lithp.h fsmutations.h diskfs-pager.h fhandle.h

# Automake variables.
libe3diskfs_la_SOURCES = $(SRCS) protid-user.c
include_HEADERS = e3diskfs.h
noinst_HEADERS = diskfs-pager.h $(LCLHDRS)

# libe3diskfs depends on libe3pager.
libe3diskfs_la_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/libe3pager

# MiG stuff.

MIGSTUBS = fsServer.c ioServer.c fsysServer.c exec_startupServer.c \
	fsys_replyUser.c fs_notifyUser.c ifsockServer.c \
	startup_notifyServer.c
nodist_libe3diskfs_la_SOURCES = $(MIGSTUBS)
BUILT_SOURCES = fs_S.h fs_notify_U.h fsys_S.h fsys_reply_U.h exec_startup_S.h ifsock_S.h io_S.h

MIG = @MIG@
fsys_MIGSFLAGS = -imacros $(srcdir)/fsmutations.h -DREPLY_PORTS
fs_MIGSFLAGS = -imacros $(srcdir)/fsmutations.h
io_MIGSFLAGS = -imacros $(srcdir)/fsmutations.h
ifsock_MIGSFLAGS = -imacros $(srcdir)/fsmutations.h
MIGCOMSFLAGS = -prefix diskfs_
include $(top_srcdir)/Makefile.mig
/* Definitions for fileserver helper functions
   Copyright (C) 2004 Ognyan Kulev
   Copyright (C) 1994,95,96,97,98,99,2001,02 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2, or (at
   your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */

#ifndef _HURD_DISKFS
#define _HURD_DISKFS

#include <assert.h>
#include <unistd.h>
#include <rwlock.h>
#include <hurd/ports.h>
#include <hurd/fshelp.h>
#include <hurd/iohelp.h>
#include <idvec.h>

#ifndef DISKFS_EXTERN_INLINE
#define DISKFS_EXTERN_INLINE extern inline
#endif

/* Each RPC call is running in some "context".  All user functions
   work in some context.  Primary use is for holding update in
   transaction.  It's allocated by user, who may extend it.  */
struct rpc_context
{
  struct protid *cred;
};

/* Each user port referring to a file points to one of these
   (with the aid of the ports library).  */
struct protid
{
  struct port_info pi;		/* libports info block */

  /* User identification */
  struct iouser *user;

  /* Object this refers to */
  struct peropen *po;

  /* Shared memory I/O information.  */
  memory_object_t shared_object;
  struct shared_io *mapped;

  void *user_data;
};

/* One of these is created for each node opened by dir_lookup. */
struct peropen
{
  int filepointer;
  int lock_status;
  int refcnt;
  int openstat;

  struct node *np;

  /* The parent of the translator's root node.  */
  mach_port_t root_parent;

  /* If this node is in a shadow tree, the parent of its root.  */
  mach_port_t shadow_root_parent;
  /* If in a shadow tree, its root node in this translator.  */
  struct node *shadow_root;
};

/* A unique one of these exists for each node currently in use (and
   possibly for some not currently in use, but with links) in the
   filesystem.  */
struct node
{
  struct node *next, **prevp;

  struct disknode *dn;

  io_statbuf_t dn_stat;

  /* Stat has been modified if one of the following four fields
     is nonzero.  Also, if one of the dn_set_?time fields is nonzero,
     the appropriate dn_stat.st_?time field needs to be updated. */
  int dn_set_ctime;
  int dn_set_atime;
  int dn_set_mtime;
  int dn_stat_dirty;

  struct mutex lock;

  int references;		/* hard references */
  int light_references;		/* light references */

  mach_port_t sockaddr;		/* address for S_IFSOCK shortcut */

  int owner;

  struct transbox transbox;

  struct lock_box userlock;

  struct conch conch;

  struct modreq *dirmod_reqs;
  unsigned int dirmod_tick;

  struct modreq *filemod_reqs;
  unsigned int filemod_tick;

  loff_t allocsize;

  ino_t cache_id;

  int author_tracks_uid;
};

/* Possibly lookup types for diskfs_lookup call */
enum lookup_type
{
  LOOKUP,
  CREATE,
  REMOVE,
  RENAME,
};

/* Pending directory and file modification request */
struct modreq
{
  mach_port_t port;
  struct modreq *next;
};


/* Special flag for diskfs_lookup. */
#define SPEC_DOTDOT 0x10000000

struct argp;			/* opaque in this file */
struct argp_child;		/* opaque in this file */
struct store;			/* opaque in this file */
struct store_parsed;		/* opaque in this file */

/* Declarations of variables the library sets.  */

extern mach_port_t diskfs_default_pager; /* send right */
extern auth_t diskfs_auth_server_port; /* send right */

/* The io_identity identity port for the filesystem. */
extern mach_port_t diskfs_fsys_identity;

/* The command line diskfs was started, set by the default argument parser.
   If you don't use it, set this yourself.  This is only used for bootstrap
   file systems, to give the procserver.  */
extern char **diskfs_argv;

/* When this is a bootstrap filesystem, the multiboot kernel command
   line passed from the kernel.  If not a bootstrap filesystem, it is
   0.  As such, it can be used to distinguish between the two cases.
   Note: this is only valid after the arguments have been parsed by,
   for example, diskfs_init_main.  */
extern const char *diskfs_boot_command_line;
#define diskfs_boot_filesystem()	(diskfs_boot_command_line != 0)

/* When this is a bootstrap filesystem, nonzero if starting each bootstrap
   program should pause for a keystroke, for debugging purposes.  */
extern int _diskfs_boot_pause;

/* Name of the init program run when this is a bootstrap filesystem.  */
extern const char *diskfs_boot_init_program;

/* Hold this lock while do fsys level operations.  Innocuous users can just
   hold a reader lock, and anyone who's going to do nasty things that would
   screw anyone else should hold a writer lock.  */
extern struct rwlock diskfs_fsys_lock;

extern volatile struct mapped_time_value *diskfs_mtime;

/* True iff we should do every operation synchronously.  It
   is the format-specific code's responsibility to keep allocation
   information permanently in sync if this is set; the rest will
   be done by format independent code. */
extern int diskfs_synchronous;

extern spin_lock_t diskfs_node_refcnt_lock;

extern int pager_port_type;

/* Whether the filesystem is currently writable or not. */
extern int diskfs_readonly;


struct pager;

/* Port classes we manage */
extern struct port_class *diskfs_protid_class;
extern struct port_class *diskfs_control_class;
extern struct port_class *diskfs_execboot_class;
extern struct port_class *diskfs_initboot_class;
extern struct port_class *diskfs_shutdown_notification_class;

extern struct port_bucket *diskfs_port_bucket;



/* Declarations of things the user must or may define.  */

/* The user must define this type.  This should hold information
   between calls to diskfs_lookup and diskfs_dir{enter,rewrite,rename}
   so that those calls work as described below.  */
struct dirstat;

/* The user must define this variable; it should be the size in bytes
   of a struct dirstat. */
extern const size_t diskfs_dirstat_size;

/* The user must define this variable; it is the maximum number of
   links to any one file.  The implementation of dir_rename does not know
   how to succeed if this is only one; on such formats you need to
   reimplement dir_rename yourself.  */
extern nlink_t diskfs_link_max;

/* The user must define this variable; it is the maximum length of
   a single pathname component (i.e. file name within directory).
   The filesystem code does not use this for anything, but it is
   returned to user queries for _PC_NAME_MAX.  */
extern nlink_t diskfs_name_max;

/* The user must define this variable; it is the maximum number of
   symlinks to be traversed within a single call to dir_lookup.
   If this is exceeded, dir_lookup will return ELOOP.  */
extern int diskfs_maxsymlinks;

/* This variable is defined by diskfs; the user should set it if
   the filesystem media cannot be made writeable. */
extern int diskfs_hard_readonly;

/* The user must define this variable.  Set this to be the node
   of root of the filesystem.  */
extern struct node *diskfs_root_node;

/* The user must define this variable.  Set this to the name of the
   filesystem server. */
extern char *diskfs_server_name;

/* The user must define this variables.  Set this to be the server
   version number.  */
extern char *diskfs_server_version;

/* The user may define this variable.  Set this to be any additional
   version specification that should be printed for --version. */
extern char *diskfs_extra_version;

/* The user may define this variable.  This should be nonzero iff the
   filesystem format supports shortcutting symlink translation.
   The library guarantees that users will not be able to read or write
   the contents of the node directly, and the library will only do so
   if the symlink hook functions return EINVAL or are not defined.
   The library knows that the dn_stat.st_size field is the length of
   the symlink, even if the hook functions are used. */
int diskfs_shortcut_symlink;

/* The user may define this variable.  This should be nonzero iff the
   filesystem format supports shortcutting chrdev translation.  */
int diskfs_shortcut_chrdev;

/* The user may define this variable.  This should be nonzero iff the
   filesystem format supports shortcutting blkdev translation.  */
int diskfs_shortcut_blkdev;

/* The user may define this variable.  This should be nonzero iff the
   filesystem format supports shortcutting fifo translation.  */
int diskfs_shortcut_fifo;

/* The user may define this variable.  This should be nonzero iff the
   filesystem format supports shortcutting ifsock translation. */
int diskfs_shortcut_ifsock;

/* The user may define this variable, otherwise it has a default value of 30.
   diskfs_set_sync_interval is called with this value when the first diskfs
   thread is started up (in diskfs_spawn_first_threa).   */
extern int diskfs_default_sync_interval;

/* The user must define this variable, which should be a string that somehow
   identifies the particular disk this filesystem is interpreting.  It is
   generally only used to print messages or to distinguish instances of the
   same filesystem type from one another.  If this filesystem accesses no
   external media, then define this to be 0.  */
extern char *diskfs_disk_name;

/* The user must define this function.  Set *STATFSBUF with
   appropriate values to reflect the current state of the filesystem.
   The buffer will be initialized to all zeros by the caller;
   the caller will set f_namelen to diskfs_name_max.  */
error_t diskfs_set_statfs (fsys_statfsbuf_t *statfsbuf,
			   struct rpc_context *rpc_context);

/* The user must define this function.  Lookup in directory DP (which
   is locked) the name NAME.  TYPE will either be LOOKUP, CREATE,
   RENAME, or REMOVE.  CRED identifies the user making the call.

   If the name is found, return zero, and (if NP is nonzero) set *NP
   to point to the node for it, locked.  If the name is not found,
   return ENOENT, and (if NP is nonzero) set *NP to zero.  If NP is
   zero, then the node found must not be locked, even transitorily.
   Lookups for REMOVE and RENAME (which must often check permissions
   on the node being found) will always set NP.

   If DS is nonzero then:
     For LOOKUP: set *DS to be ignored by diskfs_drop_dirstat.
     For CREATE: on success, set *DS to be ignored by diskfs_drop_dirstat.
                 on failure, set *DS for a future call to diskfs_direnter.
     For RENAME: on success, set *DS for a future call to diskfs_dirrewrite.
                 on failure, set *DS for a future call to diskfs_direnter.
     For REMOVE: on success, set *DS for a future call to diskfs_dirremove.
                 on failure, set *DS to be ignored by diskfs_drop_dirstat.
   The caller of this function guarantees that if DS is nonzero, then
   either the appropriate call listed above or diskfs_drop_dirstat will
   be called with DS before the directory DP is unlocked, and guarantees
   that no lookup calls will be made on this directory between this
   lookup and the use (or descruction) of *DS.

   If you use the library's versions of diskfs_rename_dir,
   diskfs_clear_directory, and diskfs_init_dir, then lookups for `..'
   might have the flag SPEC_DOTDOT or'd in.  This has the following special
   meaning:
   For LOOKUP: DP should be unlocked and its reference dropped before
               returning.
   For RENAME and REMOVE: The node being found (*NP) is already held
               locked, so don't lock it or add a reference to it.
   (SPEC_DOTDOT will not be given with CREATE.)

   Return ENOENT if NAME isn't in the directory.
   Return EAGAIN if NAME refers to the `..' of this filesystem's root.
   Return EIO if appropriate.
*/
error_t diskfs_lookup_hard (struct node *dp,
			    const char *name, enum lookup_type type,
			    struct node **np, struct dirstat *ds,
			    struct protid *cred,
			    struct rpc_context *rpc_context);

/* The user must define this function.  Add NP to directory DP
   under the name NAME.  This will only be called after an
   unsuccessful call to diskfs_lookup of type CREATE or RENAME; DP
   has been locked continuously since that call and DS is as that call
   set it, NP is locked.   CRED identifies the user responsible
   for the call (to be used only to validate directory growth). */
error_t diskfs_direnter_hard (struct node *dp, const char *name,
			      struct node *np, struct dirstat *ds,
			      struct protid *cred,
			      struct rpc_context *context);

/* The user must define this function.  This will only be called after
   a successful call to diskfs_lookup of type RENAME; this call should change
   the name found in directory DP to point to node NP instead of its previous
   referent.  DP has been locked continuously since the call to diskfs_lookup
   and DS is as that call set it; NP is locked.  */
error_t diskfs_dirrewrite_hard (struct node *dp, struct node *np,
				struct dirstat *ds,
				struct rpc_context *context);

/* The user must define this function.  This will only be called after a
   successful call to diskfs_lookup of type REMOVE; this call should remove
   the name found from the directory DS.  DP has been locked continuously since
   the call to diskfs_lookup and DS is as that call set it.  */
error_t diskfs_dirremove_hard (struct node *dp, struct dirstat *ds,
			       struct rpc_context *rpc_context);

/* The user must define this function.  Initialize DS such that
   diskfs_drop_dirstat will ignore it. */
void diskfs_null_dirstat (struct dirstat *ds,
			  struct rpc_context *context);

/* The user must define this function.  DS has been set by a previous
   call to diskfs_lookup on directory DP; this function is
   guaranteed to be called if none of
   diskfs_dir{enter,rename,rewrite} is, and should free any state
   retained by a struct dirstat.  DP has been locked continuously since
   the call to diskfs_lookup.  */
error_t diskfs_drop_dirstat (struct node *dp, struct dirstat *ds,
			     struct rpc_context *context);

/* The user must define this function.  Return N directory entries
   starting at ENTRY from locked directory node DP.  Fill *DATA with
   the entries; that pointer currently points to *DATACNT bytes.  If
   it isn't big enough, vm_allocate into *DATA.  Set *DATACNT with the
   total size used.  Fill AMT with the number of entries copied.
   Regardless, never copy more than BUFSIZ bytes.  If BUFSIZ is 0,
   then there is no limit on *DATACNT; if N is -1, then there is no limit
   on AMT. */
error_t diskfs_get_directs (struct node *dp, int entry, int n,
			    char **data, size_t *datacnt,
			    vm_size_t bufsiz, int *amt,
			    struct rpc_context *context);

/* The user must define this function.  For locked node NP (for which
   diskfs_node_translated is true) look up the name of its translator.
   Store the name into newly malloced storage; set *NAMELEN to the
   total length.  */
error_t diskfs_get_translator (struct node *np, char **namep, u_int *namelen,
			       struct rpc_context *context);

/* The user must define this function.  For locked node NP, set
   the name of the translating program to be NAME, length NAMELEN.  CRED
   identifies the user responsible for the call.  */
error_t diskfs_set_translator (struct node *np,
			       const char *name, u_int namelen,
			       struct protid *cred,
			       struct rpc_context *context);

/* The user must define this function.  Truncate locked node NP to be SIZE
   bytes long.  (If NP is already less than or equal to SIZE bytes
   long, do nothing.)  If this is a symlink (and diskfs_shortcut_symlink
   is set) then this should clear the symlink, even if
   diskfs_create_symlink_hook stores the link target elsewhere.  */
error_t diskfs_truncate (struct node *np, loff_t size,
			 struct rpc_context *context);

/* The user must define this function.  Grow the disk allocated to locked node
   NP to be at least SIZE bytes, and set NP->allocsize to the actual
   allocated size.  (If the allocated size is already SIZE bytes, do
   nothing.)  CRED identifies the user responsible for the call.  */
error_t diskfs_grow (struct node *np, loff_t size, struct protid *cred,
		     struct rpc_context *context);

/* The user must define this function.  Write to disk (synchronously
   iff WAIT is nonzero) from format-specific buffers any non-paged
   metadata.  If CLEAN is nonzero, then after this is written the
   filesystem will be absolutely clean, and the non-paged metadata can
   so indicate.  */
error_t diskfs_set_hypermetadata (int wait, int clean,
				  struct rpc_context *context);

/* The user must define this function.  Allocate a new node to be of
   mode MODE in locked directory DP (don't actually set the mode or
   modify the dir, that will be done by the caller); the user
   responsible for the request can be identified with CRED.  Set *NP
   to be the newly allocated node.  */
error_t diskfs_alloc_node (struct node *dp, mode_t mode, struct node **np,
			   struct rpc_context *context);

/* Free node NP; the on disk copy has already been synced with
   diskfs_node_update (where NP->dn_stat.st_mode was 0).  It's
   mode used to be MODE.  */
void diskfs_free_node (struct node *np, mode_t mode,
		       struct rpc_context *context);

/* There are no more hard links to NP in the filesystem but it's used
   by some process.  NP must be locked.  */
error_t diskfs_orphan_node (struct node *np, struct rpc_context *context);

/* Node NP has no more references; free local state, including *NP
   if it isn't to be retained.  diskfs_node_refcnt_lock is held. */
void diskfs_node_norefs (struct node *np,
			 struct rpc_context *context);

/* The user must define this function.  Node NP has some light
   references, but has just lost its last hard references.  Take steps
   so that if any light references can be freed, they are.  NP is locked
   as is the pager refcount lock.  This function will be called after
   diskfs_lost_hardrefs.  */
void diskfs_try_dropping_softrefs (struct node *np,
				   struct rpc_context *context);

/* The user must define this funcction.  Node NP has some light
   references but has just lost its last hard reference.  NP is locked. */
void diskfs_lost_hardrefs (struct node *np,
			   struct rpc_context *context);

/* The user must define this function.  Node NP has just acquired
   a hard reference where it had none previously.  It is thus now
   OK again to have light references without real users.  NP is
   locked. */
void diskfs_new_hardrefs (struct node *np,
			  struct rpc_context *context);

/* The user must define this function.  Return non-zero if locked
   directory DP is empty.  If the user does not redefine
   diskfs_clear_directory and diskfs_init_directory, then `empty'
   means `possesses entries labelled . and .. only'.  CRED
   identifies the user making the call (if this user can't search
   the directory, then this routine should fail). */
int diskfs_dirempty (struct node *dp, struct protid *cred,
		     struct rpc_context *context);

/* The user may define this function.  Return 0 if NP's mode can be
   changed to MODE; otherwise return an error code.  It must always be
   possible to clear the mode; diskfs will not ask for permission
   before doing so.  */
error_t diskfs_validate_mode_change (struct node *np, mode_t mode,
				     struct rpc_context *context);

/* The user may define this function.  Return 0 if NP's owner can be
   changed to UID; otherwise return an error code. */
error_t diskfs_validate_owner_change (struct node *np, uid_t uid,
				      struct rpc_context *context);

/* The user may define this function.  Return 0 if NP's group can be
   changed to GID; otherwise return an error code. */
error_t diskfs_validate_group_change (struct node *np, gid_t gid,
				      struct rpc_context *context);

/* The user may define this function.  Return 0 if NP's author can be
   changed to AUTHOR; otherwise return an error code. */
error_t diskfs_validate_author_change (struct node *np, uid_t author,
				       struct rpc_context *context);

/* The user may define this function.  Return 0 if NP's flags can be
   changed to FLAGS; otherwise return an error code.  It must always
   be possible to clear the flags.   */
error_t diskfs_validate_flags_change (struct node *np, int flags,
				      struct rpc_context *context);

/* The user may define this function.  Return 0 if NP's rdev can be
   changed to RDEV; otherwise return an error code. */
error_t diskfs_validate_rdev_change (struct node *np, dev_t rdev,
				     struct rpc_context *context);

/* The user must define this function.  Sync the info in NP->dn_stat
   and any associated format-specific information to disk.  If WAIT is true,
   then return only after the physicial media has been completely updated. */
void diskfs_write_disknode (struct node *np, int wait,
			    struct rpc_context *context);

/* The user must define this function.  Sync the file contents and all
   associated meta data of file NP to disk (generally this will involve
   calling diskfs_node_update for much of the metadata).  If WAIT is true,
   then return only after the physical media has been completely updated.  */
void diskfs_file_update (struct node *np, int wait,
			 struct rpc_context *context);

/* The user must define this function.  For each active node, call
   FUN.  The node is to be locked around the call to FUN.  If FUN
   returns non-zero for any node, then immediately stop, and return
   that value. */
error_t diskfs_node_iterate (error_t (*fun)(struct node *,
					    struct rpc_context *),
			     struct rpc_context *context);

/* The user must define this function.  Sync all the pagers and any
   data belonging on disk except for the hypermetadata.  If WAIT is true,
   then return only after the physicial media has been completely updated. */
void diskfs_sync_everything (int wait,
			     struct rpc_context *context);

/* Shutdown all pagers; this is done when the filesystem is exiting and is
   irreversable.  */
void diskfs_shutdown_pager (struct rpc_context *context);

/* The user must define this function.  Return a memory object port (send
   right) for the file contents of NP.  PROT is the maximum allowable
   access.  On errors, return MACH_PORT_NULL and set errno.  */
mach_port_t diskfs_get_filemap (struct node *np, vm_prot_t prot,
				struct rpc_context *context);

/* The user must define this function.  Return true if there are pager
   ports exported that might be in use by users.  If this returns false, then
   further pager creation is also blocked.  */
int diskfs_pager_users (struct rpc_context *context);

/* Return the bitwise or of the maximum prot parameter (the second arg to
   diskfs_get_filemap) for all active user pagers. */
vm_prot_t diskfs_max_user_pager_prot (struct rpc_context *context);

/* The user must define this function.  Return a `struct pager *' suitable
   for use as an argument to diskfs_register_memory_fault_area that
   refers to the pager returned by diskfs_get_filemap for node NP.
   NP is locked.  */
struct pager *diskfs_get_filemap_pager_struct (struct node *np,
					       struct rpc_context *context);

/* The user may define this function.  It is called when the disk has been
   changed from read-only to read-write mode or vice-versa.  READONLY is the
   new state (which is also reflected in DISKFS_READONLY).  This function is
   also called during initial startup if the filesystem is to be writable.  */
void diskfs_readonly_changed (int readonly, struct rpc_context *context);

/* The user must define this function.  It must invalidate all cached global
   state, and re-read it as necessary from disk, without writing anything.
   It is always called with DISKFS_READONLY true.  diskfs_node_reload is
   subsequently called on all active nodes, so this call needn't re-read any
   node-specific data.  */
error_t diskfs_reload_global_state (struct rpc_context *context);

/* The user must define this function.  It must re-read all data specific to
   NODE from disk, without writing anything.  It is always called with
   DISKFS_READONLY true.  */
error_t diskfs_node_reload (struct node *node, struct rpc_context *context);

/* If this function is nonzero (and diskfs_shortcut_symlink is set) it
   is called to set a symlink.  If it returns EINVAL or isn't set,
   then the normal method (writing the contents into the file data) is
   used.  If it returns any other error, it is returned to the user.  */
error_t (*diskfs_create_symlink_hook)(struct node *np, const char *target,
				      struct rpc_context *context);

/* If this function is nonzero (and diskfs_shortcut_symlink is set) it
   is called to read the contents of a symlink.  If it returns EINVAL or
   isn't set, then the normal method (reading from the file data) is
   used.  If it returns any other error, it is returned to the user. */
error_t (*diskfs_read_symlink_hook)(struct node *np, char *target,
				    struct rpc_context *context);

/* The library exports the following functions for general use */

/* Call this after arguments have been parsed to initialize the library.
   You must call this before calling any other diskfs functions, and after
   parsing diskfs options.  */
error_t diskfs_init_diskfs (void);

/* Call this once the filesystem is fully initialized, to advertise the new
   filesystem control port to our parent filesystem.  If BOOTSTRAP is set,
   the diskfs will call fsys_startup on that port as appropriate and return
   the REALNODE returned in that call; otherwise we return MACH_PORT_NULL.
   FLAGS specifies how to open REALNODE (from the O_* set).  */
mach_port_t diskfs_startup_diskfs (mach_port_t bootstrap, int flags);

/* Call this after all format-specific initialization is done (except
   for setting diskfs_root_node); at this point the pagers should be
   ready to go.  DEMUXER is the demuxer to user.  Normally, this is
   just diskfs_demuxer.  */
void diskfs_spawn_first_thread (ports_demuxer_type demuxer);

/* Once diskfs_root_node is set, call this if we are a bootstrap
   filesystem.  If you call this, then the library will call
   diskfs_init_completed once it has a valid proc and auth port. */
void diskfs_start_bootstrap ();

/* Node NP now has no more references; clean all state.  The
   _diskfs_node_refcnt_lock must be held, and will be released
   upon return.  NP must be locked.  */
void diskfs_drop_node (struct node *np, struct rpc_context *context);

/* Set on disk fields from NP->dn_stat; update ctime, atime, and mtime
   if necessary.  If WAIT is true, then return only after the physical
   media has been completely updated.  */
void diskfs_node_update (struct node *np, int wait,
			 struct rpc_context *context);

/* Add a hard reference to a node.  If there were no hard
   references previously, then the node cannot be locked
   (because you must hold a hard reference to hold the lock). */
void diskfs_nref (struct node *np, struct rpc_context *context);

/* Unlock node NP and release a hard reference; if this is the last
   hard reference and there are no links to the file then request
   soft references to be dropped.  */
void diskfs_nput (struct node *np, struct rpc_context *context);

/* Release a hard reference on NP.  If NP is locked by anyone, then
   this cannot be the last hard reference (because you must hold a
   hard reference in order to hold the lock).  If this is the last
   hard reference and there are no links, then request soft references
   to be dropped.  */
void diskfs_nrele (struct node *np, struct rpc_context *context);

/* Add a light reference to a node. */
void diskfs_nref_light (struct node *np, struct rpc_context *context);

/* Unlock node NP and release a light reference */
void diskfs_nput_light (struct node *np, struct rpc_context *context);

/* Release a light reference on NP.  If NP is locked by anyone, then
   this cannot be the last reference (because you must hold a
   hard reference in order to hold the lock).  */
void diskfs_nrele_light (struct node *np, struct rpc_context *context);

/* Reading and writing of files. this is called by other filesystem
   routines and handles extension of files automatically.  NP is the
   node to be read or written, and must be locked.  DATA will be
   written or filled.  OFF identifies where in thi fel the I/O is to
   take place (-1 is not allowed).  AMT is the size of DATA and tells
   how much to copy.  DIR is 1 for writing and 0 for reading.  CRED is
   the user doing the access (only used to validate attempted file
   extension).  For reads, *AMTREAD is filled with the amount actually
   read.  */
error_t
diskfs_node_rdwr (struct node *np, char *data, loff_t off,
		  size_t amt, int dir, struct protid *cred,
		  size_t *amtread, struct rpc_context *context);


/* Send notifications to users who have requested them with
   dir_notice_changes for directory DP.  The type of modification and
   affected name are TYPE and NAME respectively.  This should be
   called by diskfs_direnter, diskfs_dirremove, and diskfs_dirrewrite,
   and anything else that changes the directory, after the change is
   fully completed.  */
void
diskfs_notice_dirchange (struct node *dp, enum dir_changed_type type,
			 const char *name, struct rpc_context *context);

/* Send notifications to users who have requested them with
   file_notice_changes for file NP.  The type of modification is TYPE.
   START and END identify the affected region of the file's data.
   This should be called after the change is fully completed.  */
void
diskfs_notice_filechange (struct node *np, enum file_changed_type type,
			  loff_t start, loff_t end,
			  struct rpc_context *context);

/* Create a new node structure with DS as its physical disknode.
   The new node will have one hard reference and no light references.  */
struct node *diskfs_make_node (struct disknode *dn,
			       struct rpc_context *context);


/* The library also exports the following functions; they are not generally
   useful unless you are redefining other functions the library provides. */

/* Lookup in directory DP (which is locked) the name NAME.  TYPE will
   either be LOOKUP, CREATE, RENAME, or REMOVE.  CRED identifies the
   user making the call.

   NAME will have leading and trailing slashes stripped.  It is an
   error if there are internal slashes.  NAME will be modified in
   place if there are slashes in it; it is therefore an error to
   specify a constant NAME which contains slashes.

   If the name is found, return zero, and (if NP is nonzero) set *NP
   to point to the node for it, locked.  If the name is not found,
   return ENOENT, and (if NP is nonzero) set *NP to zero.  If NP is
   zero, then the node found must not be locked, even transitorily.
   Lookups for REMOVE and RENAME (which must often check permissions
   on the node being found) will always set NP.

   If DS is nonzero then:
     For LOOKUP: set *DS to be ignored by diskfs_drop_dirstat.
     For CREATE: on success, set *DS to be ignored by diskfs_drop_dirstat.
                 on failure, set *DS for a future call to diskfs_direnter.
     For RENAME: on success, set *DS for a future call to diskfs_dirrewrite.
                 on failure, set *DS for a future call to diskfs_direnter.
     For REMOVE: on success, set *DS for a future call to diskfs_dirremove.
                 on failure, set *DS to be ignored by diskfs_drop_dirstat.
   The caller of this function guarantees that if DS is nonzero, then
   either the appropriate call listed above or diskfs_drop_dirstat will
   be called with DS before the directory DP is unlocked, and guarantees
   that no lookup calls will be made on this directory between this
   lookup and the use (or descruction) of *DS.

   If you use the library's versions of diskfs_rename_dir,
   diskfs_clear_directory, and diskfs_init_dir, then lookups for `..'
   might have the flag SPEC_DOTDOT or'd in.  This has the following special
   meaning:
   For LOOKUP: DP should be unlocked and its reference dropped before
               returning.
   For RENAME and REMOVE: The node being found (*NP) is already held
               locked, so don't lock it or add a reference to it.
   (SPEC_DOTDOT will not be given with CREATE.)

   Return ENOTDIR if DP is not a directory.
   Return EACCES if CRED isn't allowed to search DP.
   Return EACCES if completing the operation will require writing
   the directory and diskfs_checkdirmod won't allow the modification.
   Return ENOENT if NAME isn't in the directory.
   Return EAGAIN if NAME refers to the `..' of this filesystem's root.
   Return EIO if appropriate.

   This function is a wrapper for diskfs_lookup_hard.
*/
error_t diskfs_lookup (struct node *dp,
		       char *name, enum lookup_type type,
		       struct node **np, struct dirstat *ds,
		       struct protid *cred, struct rpc_context *context);

/* Add NP to directory DP under the name NAME.  This will only be
   called after an unsuccessful call to diskfs_lookup of type CREATE
   or RENAME; DP has been locked continuously since that call and DS
   is as that call set it, NP is locked.  CRED identifies the user
   responsible for the call (to be used only to validate directory
   growth).  This function is a wrapper for diskfs_direnter_hard.  */
error_t
diskfs_direnter (struct node *dp, const char *name, struct node *np,
		 struct dirstat *ds, struct protid *cred,
		 struct rpc_context *context);

/* This will only be called after a successful call to diskfs_lookup
   of type RENAME; this call should change the name found in directory
   DP to point to node NP instead of its previous referent, OLDNP.  DP
   has been locked continuously since the call to diskfs_lookup and DS
   is as that call set it; NP is locked.  This routine should call
   diskfs_notice_dirchange if DP->dirmod_reqs is nonzero.  NAME is the
   name of OLDNP inside DP; it is this reference which is being
   rewritten. This function is a wrapper for diskfs_dirrewrite_hard.  */
error_t diskfs_dirrewrite (struct node *dp, struct node *oldnp,
			   struct node *np, const char *name,
			   struct dirstat *ds,
			   struct rpc_context *context);

/* This will only be called after a successful call to diskfs_lookup
   of type REMOVE; this call should remove the name found from the
   directory DS.  DP has been locked continuously since the call to
   diskfs_lookup and DS is as that call set it.  This routine should
   call diskfs_notice_dirchange if DP->dirmod_reqs is nonzero.  This
   function is a wrapper for diskfs_dirremove_hard.  The entry being
   removed has name NAME and refers to NP.  */
error_t diskfs_dirremove (struct node *dp, struct node *np,
			  const char *name, struct dirstat *ds,
			  struct rpc_context *context);

/* Return the node corresponding to CACHE_ID in *NPP. */
error_t diskfs_cached_lookup (ino64_t cache_id, struct node **npp,
			      struct rpc_context *context);

/* Create a new node. Give it MODE; if that includes IFDIR, also
   initialize `.' and `..' in the new directory.  Return the node in NPP.
   CRED identifies the user responsible for the call.  If NAME is nonzero,
   then link the new node into DIR with name NAME; DS is the result of a
   prior diskfs_lookup for creation (and DIR has been held locked since).
   DIR must always be provided as at least a hint for disk allocation
   strategies.  */
error_t
diskfs_create_node (struct node *dir, const char *name, mode_t mode,
		    struct node **newnode, struct protid *cred,
		    struct dirstat *ds, struct rpc_context *context);

/* Create and return a protid for an existing peropen PO in CRED,
   referring to user USER.  The node PO->np must be locked. */
error_t diskfs_create_protid (struct peropen *po, struct iouser *user,
			      struct protid **cred,
			      struct rpc_context *context);

/* Build and return in CRED a protid which has no user identification, for
   peropen PO.  The node PO->np must be locked.  */
error_t diskfs_start_protid (struct peropen *po, struct protid **cred,
			     struct rpc_context *context);

/* Finish building protid CRED started with diskfs_start_protid;
   the user to install is USER.  */
void diskfs_finish_protid (struct protid *cred, struct iouser *user,
			   struct rpc_context *context);

/* Called by MiG to translate ports into struct protid *.
   fsmutations.h arranges for this to happen for the io and
   fs interfaces. */
DISKFS_EXTERN_INLINE struct protid *
diskfs_begin_using_protid_port (file_t port)
{
  return ports_lookup_port (diskfs_port_bucket, port, diskfs_protid_class);
}

/* Called by MiG after server routines have been run; this
   balances begin_using_protid_port, and is arranged for the io
   and fs interfaces by fsmutations.h. */
DISKFS_EXTERN_INLINE void
diskfs_end_using_protid_port (struct protid *cred)
{
  if (cred)
    ports_port_deref (cred);
}

/* Called when a protid CRED has no more references.  (Because references\
   to protids are maintained by the port management library, this is
   installed in the clean routines list.)  The ports library will
   free the structure for us.  */
void diskfs_protid_rele (void *arg);

/* Create a new peropen structure on node NP with open flags FLAGS in
   *PPO.  The initial values for the root_parent, shadow_root, and
   shadow_root_parent fields are copied from CONTEXT if it's non-zero,
   otherwise they are zeroed.  */
error_t
diskfs_make_peropen (struct node *np, int flags,
		     struct peropen *context, struct peropen **ppo,
		     struct rpc_context *rpc_context);

/* Decrement the reference count on a peropen structure. */
void diskfs_release_peropen (struct peropen *po,
			     struct rpc_context *rpc_context);

/* Node NP has just been found in DIR with NAME.  If NP is null, that
   means that this name has been confirmed as absent in the directory. */
void diskfs_enter_lookup_cache (struct node *dir, struct node *np,
				const char *name,
				struct rpc_context *rpc_context);

/* Purge all references in the cache to NP as a node inside
   directory DP. */
void diskfs_purge_lookup_cache (struct node *dp, struct node *np,
				struct rpc_context *rpc_context);

/* Scan the cache looking for NAME inside DIR.  If we don't know
   anything entry at all, then return 0.  If the entry is confirmed to
   not exist, then return -1.  Otherwise, return NP for the entry, with
   a newly allocated reference. */
struct node *diskfs_check_lookup_cache (struct node *dir, const char *name,
					struct rpc_context *rpc_context);

/* Rename directory node FNP (whose parent is FDP, and which has name
   FROMNAME in that directory) to have name TONAME inside directory
   TDP.  None of these nodes are locked, and none should be locked
   upon return.  This routine is serialized, so it doesn't have to be
   reentrant.  Directories will never be renamed except by this
   routine.  FROMCRED and TOCRED are the users responsible for
   FDP/FNP and TDP respectively.  This routine assumes the usual
   convention where `.' and `..' are represented by ordinary links;
   if that is not true for your format, you have to redefine this
   function.*/
error_t
diskfs_rename_dir (struct node *fdp, struct node *fnp, const char *fromname,
		   struct node *tdp, const char *toname,
		   struct protid *fromcred, struct protid *tocred,
		   struct rpc_context *context);

/* Clear the `.' and `..' entries from directory DP.  Its parent is
   PDP, and the user responsible for this is identified by CRED.  Both
   directories must be locked.  This routine assumes the usual
   convention where `.' and `..' are represented by ordinary links; if
   that is not true for your format, you have to redefine this
   function. */
error_t diskfs_clear_directory (struct node *dp, struct node *pdp,
				struct protid *cred,
				struct rpc_context *context);

/* Locked node DP is a new directory; add whatever links are necessary
   to give it structure; its parent is the (locked) node PDP.
   This routine may not call diskfs_lookup on PDP.  The new directory
   must be clear within the meaning of diskfs_dirempty.  This routine
   assumes the usual convention where `.' and `..' are represented by
   ordinary links; if that is not true for your format, you have to
   redefine this function.   CRED identifies the user making the call. */
error_t
diskfs_init_dir (struct node *dp, struct node *pdp, struct protid *cred,
		 struct rpc_context *context);

/* If NP->dn_set_ctime is set, then modify NP->dn_stat.st_ctime
   appropriately; do the analogous operation for atime and mtime as well. */
void diskfs_set_node_times (struct node *np);

/* Shutdown the filesystem; flags are as for fsys_goaway. */
error_t diskfs_shutdown (int flags);

/* Change an active filesystem between read-only and writable modes, setting
   the global variable DISKFS_READONLY to reflect the current mode.  If an
   error is returned, nothing will have changed.  DISKFS_FSYS_LOCK should be
   held while calling this routine.  */
error_t diskfs_set_readonly (int readonly, struct rpc_context *context);

/* Re-read all incore data structures from disk.  This will only work if
   DISKFS_READONLY is true.  DISKFS_FSYS_LOCK should be held while calling
   this routine.  */
error_t diskfs_remount (struct rpc_context *rpc_context);

/* Called by S_fsys_startup for execserver bootstrap.  The execserver
   is able to function without a real node, hence this fraud.  Arguments
   are all as for fsys_startup in <hurd/fsys.defs>.  */
error_t diskfs_execboot_fsys_startup (mach_port_t port, int flags,
				      mach_port_t ctl, mach_port_t *real,
				      mach_msg_type_name_t *realpoly);

/* Establish a thread to sync the filesystem every INTERVAL seconds, or
   never, if INTERVAL is zero.  If an error occurs creating the thread, it is
   returned, otherwise 0.  Subsequent calls will create a new thread and
   (eventually) get rid of the old one; the old thread won't do any more
   syncs, regardless.  */
error_t diskfs_set_sync_interval (int interval);

/* Parse and execute the runtime options in ARGZ & ARGZ_LEN.  EINVAL is
   returned if some option is unrecognized.  The default definition of this
   routine will parse them using DISKFS_RUNTIME_ARGP, which see.  */
error_t diskfs_set_options (const char *argz, size_t argz_len);

/* Append to the malloced string *ARGZ of length *ARGZ_LEN a NUL-separated
   list of the arguments to this translator.  The default definition of this
   routine simply calls diskfs_append_std_options.  */
error_t diskfs_append_args (char **argz, size_t *argz_len);

/* If this is defined or set to an argp structure, it will be used by the
   default diskfs_set_options to handle runtime option parsing.  The default
   definition is initialized to a pointer to DISKFS_STD_RUNTIME_ARGP.  */
extern struct argp *diskfs_runtime_argp;

/* An argp for the standard diskfs runtime options.  The default definition
   of DISKFS_RUNTIME_ARGP points to this, although if the user redefines
   that, he may chain this onto his argp as well.  */
extern const struct argp diskfs_std_runtime_argp;

/* An argp structure for the standard diskfs command line arguments.  The
   user may call argp_parse on this to parse the command line, chain it onto
   the end of his own argp structure, or ignore it completely.  */
extern const struct argp diskfs_startup_argp;

/* An argp structure for the standard diskfs command line arguments plus a
   store specification.  The address of a location in which to return the
   resulting struct store_parsed structure should be passed as the input
   argument to argp_parse; see the declaration for STORE_ARGP in
   <hurd/store.h> for more information.  */
extern const struct argp diskfs_store_startup_argp;

/* *Appends* to ARGZ & ARGZ_LEN '\0'-separated options describing the standard
   diskfs option state (note that unlike diskfs_get_options, ARGZ & ARGZ_LEN
   must already have a sane value).  */
error_t diskfs_append_std_options (char **argz, size_t *argz_len);

/* Demultiplex incoming messages on ports created by libdiskfs.  */
int diskfs_demuxer (mach_msg_header_t *, mach_msg_header_t *);

/* Check if the filesystem is readonly before an operation that
   writes it.  Return 1 if readonly, zero otherwise. */
int diskfs_check_readonly (void);

/* The diskfs library provides functions to demultiplex the fs, io,
   fsys, interrupt, and notify interfaces.  All the server routines
   have the prefix `diskfs_S_'; `in' arguments of type file_t or io_t
   appear as `struct protid *' to the stub.  */


/* All-in-one initialization function for diskfs filesystems using
   libstore.  This parses arguments using STARTUP_ARGP (defaulting to
   diskfs_store_startup_argp if it's null; note that the ARGP_IN_ORDER
   flag is always used); it calls diskfs_init_diskfs; it opens the
   store with store_parsed_open, and sets diskfs_hard_readonly and
   diskfs_readonly if the store is unwritable; it calls
   diskfs_spawn_first_thread; finally, it returns the store and its
   description in *STORE and *STORE_PARSED, and the bootstrap port in
   *BOOTSTRAP.  The caller should pass *BOOTSTRAP to
   diskfs_startup_diskfs after setting diskfs_root_node.
   (See <argp.h> and <hurd/store.h>.)

   This call cannot return failure; if it encounters a fatal problem,
   it prints a diagnostic on stderr (or the console) and exits the
   program.  */
struct store *diskfs_init_main (struct argp *startup_argp,
				int argc, char **argv,
				struct store_parsed **store_parsed,
				mach_port_t *bootstrap);

/* The following are optional convenience routines and global variable, which
   can be used by any user program that uses a mach device to hold the
   underlying filesystem.  */

/* Make errors go somewhere reasonable.  */
void diskfs_console_stdio ();

/* libe3diskfs extensions.  Used for marking the start and end of one
   transaction update.  */

/* User may define this function.  Set USER_DATA for new protid,
   initiated by USER.  */
error_t diskfs_protid_begin (void **user_data, struct iouser *user);

/* User may define this function.  CRED is destroyed.  Do whatever
   necessary.  */
error_t diskfs_protid_end (struct protid *cred, void *user_data);

/* Return USER_DATA for given CRED.  */
void *diskfs_protid_get (struct protid *cred);


/* New e3diskfs extensions.  */

/* User must define this function.  Return new context for an
   operation.  */
error_t diskfs_rpc_context_new (struct rpc_context **context,
				struct protid *cred);

/* User must define this function.  Delete context.  */
error_t diskfs_rpc_context_delete (struct rpc_context *context);

#endif	/* hurd/diskfs.h */
/* Map the disk image and handle faults accessing it.
   Copyright (C) 1996, 1997 Free Software Foundation, Inc.
   Written by Roland McGrath.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2, or (at
   your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */

#ifndef _HURD_DISKFS_PAGER_H
#define _HURD_DISKFS_PAGER_H 1

#include <e3pager.h>
#include <hurd/ports.h>
#include <setjmp.h>
#include <cthreads.h>
#include <errno.h>
#include <assert.h>
#include <stdlib.h>

/* Start a pager for the whole disk, and store it in DISKFS_DISK_PAGER,
   preparing a signal preemptor so that the `diskfs_catch_exception' macro
   below works.  SIZE should be the size of the image to map, and the address
   mapped is returned in IMAGE.  INFO, PAGER_BUCKET, & MAY_CACHE are passed
   to `pager_create'.  */
extern void diskfs_start_disk_pager (struct user_pager_info *info,
				     struct port_bucket *pager_bucket, int may_cache,
				     size_t size, void **image);

extern struct pager *diskfs_disk_pager;

struct disk_image_user
  {
    jmp_buf env;
    struct disk_image_user *next;
  };

/* Return zero now.  Return a second time with a nonzero error_t
   if this thread faults accessing `disk_image' before calling
   `diskfs_end_catch_exception' (below).  */
#define diskfs_catch_exception()					      \
({									      \
    struct disk_image_user *diu = alloca (sizeof *diu);			      \
    error_t err;							      \
    diu->next = (void *) cthread_data (cthread_self ());		      \
    err = setjmp (diu->env);						      \
    if (err == 0)							      \
      cthread_set_data (cthread_self (), diu);				      \
    err;								      \
})

/* No longer handle faults on `disk_image' in this thread.
   Any unexpected fault hereafter will crash the program.  */
#define diskfs_end_catch_exception()					      \
({									      \
    struct disk_image_user *diu = (void *) cthread_data (cthread_self ());    \
    cthread_set_data (cthread_self (), diu->next);			      \
})


#endif	/* hurd/diskfs-pager.h */
bin_PROGRAMS = ext3fs
ext3fs_SOURCES = balloc.c bitmap.c dir.c ext3fs.c getblk.c hyper.c ialloc.c \
	inode.c jstore.c orphan.c pager.c truncate.c storeinfo.c msg.c \
	ext3fs_ei.c ext3fs.h ext3_fs.h ext3_fs_i.h
ext3fs_CPPFLAGS = $(AM_CPPFLAGS) -DEXT3FS_DEBUG=1 \
	-I$(top_srcdir)/libe3pager -I$(top_srcdir)/libscache \
	-I$(top_srcdir)/libjstore  -I$(top_srcdir)/libe3diskfs

ext3fs_LDFLAGS = -static
ext3fs_LDADD =  ../libjstore/libjstore.la \
		../libscache/libscache.la \
		../libe3diskfs/libe3diskfs.la \
		../libe3pager/libe3pager.la \
		-liohelp -lfshelp -lstore \
		-lthreads -lports -lihash -lshouldbeinlibc
/* Common definitions for the ext3 filesystem translator

   Copyright (C) 2003, 2004 Ognyan Kulev
   Copyright (C) 1995,1996,1999,2002,2003 Free Software Foundation, Inc.

   Written by Miles Bader <miles@gnu.ai.mit.edu>

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2, or (at
   your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */

#include <stdint.h>
#include <mach.h>
#include <hurd.h>
#include <hurd/ports.h>
#include <e3pager.h>
#include <hurd/fshelp.h>
#include <hurd/iohelp.h>
#include <hurd/store.h>
#include <e3diskfs.h>
#include <hurd/ihash.h>
#include <assert.h>
#include <rwlock.h>
#include <sys/mman.h>
#include <scache.h>
#include <jstore.h>

#define __hurd__		/* Enable some hurd-specific fields.  */

/* Types used by the ext3 header files.  */
typedef uint32_t  __u32;
typedef __u32	  u32;
typedef int32_t   __s32;
typedef uint16_t  __u16;
typedef int16_t   __s16;
typedef uint8_t   __u8;
typedef int8_t    __s8;

#include "ext3_fs.h"
#include "ext3_fs_i.h"

#define i_mode_high	osd2.hurd2.h_i_mode_high /* missing from ext3_fs.h */

#ifndef EXT3FS_EI
#define EXT3FS_EI extern inline
#endif

/* If ext3_fs.h defined a debug routine, undef it and use our own.  */
#undef ext3_debug

#ifdef EXT3FS_DEBUG
extern int ext3_debug_flag;
#define ext3_debug(f, a...) \
 do { if (ext3_debug_flag) printf ("ext3fs: (debug) %s: " f "\n", __FUNCTION__ , ## a); } while (0)
#else
#define ext3_debug(f, a...)	(void)0
#endif

#undef __hurd__

/* Define this if memory objects should not be cached by the kernel.
   Normally, don't define it, but defining it causes a much greater rate
   of paging requests, which may be helpful in catching bugs. */

#undef DONT_CACHE_MEMORY_OBJECTS

int printf (const char *fmt, ...);

/* A block number.  */
typedef __u32 block_t;

/* Store-to-host */
#define STOH16(x)	(x)
#define STOH32(x)	(x)

/* Host-to-store */
#define HTOS16(x)	(x)
#define HTOS32(x)	(x)

/* Addition and subtraction of store values.  */
#define SADD16(x,v)	(x) = HTOS16(STOH16(x) + (v))
#define SADD32(x,v)	(x) = HTOS32(STOH32(x) + (v))

#define SSUB16(x,v)	(x) = HTOS16(STOH16(x) - (v))
#define SSUB32(x,v)	(x) = HTOS32(STOH32(x) - (v))

/* ---------------------------------------------------------------- */

#define ext3_error(fmt, args...) _ext3_error (__FUNCTION__, fmt , ##args)
extern void _ext3_error (const char *, const char *, ...)
     __attribute__ ((format (printf, 2, 3)));

#define ext3_panic(fmt, args...) _ext3_panic (__FUNCTION__, fmt , ##args)
extern void _ext3_panic (const char *, const char *, ...)
     __attribute__ ((format (printf, 2, 3)));

extern void ext3_warning (const char *, ...)
     __attribute__ ((format (printf, 1, 2)));

/* ---------------------------------------------------------------- */
/* Bitmap routines.  */

#include <stdint.h>

/* Returns TRUE if bit NUM is set in BITMAP.  */
EXT3FS_EI int
test_bit (unsigned num, char *bitmap)
{
  const uint32_t *const bw = (uint32_t *) bitmap + (num >> 5);
  const uint_fast32_t mask = 1 << (num & 31);
  return *bw & mask;
}

/* Sets bit NUM in BITMAP, and returns the previous state of the bit.  Unlike
   the linux version, this function is NOT atomic!  */
EXT3FS_EI int
set_bit (unsigned num, char *bitmap)
{
  uint32_t *const bw = (uint32_t *) bitmap + (num >> 5);
  const uint_fast32_t mask = 1 << (num & 31);
  return (*bw & mask) ?: (*bw |= mask, 0);
}

/* Clears bit NUM in BITMAP, and returns the previous state of the bit.
   Unlike the linux version, this function is NOT atomic!  */
EXT3FS_EI int
clear_bit (unsigned num, char *bitmap)
{
  uint32_t *const bw = (uint32_t *) bitmap + (num >> 5);
  const uint_fast32_t mask = 1 << (num & 31);
  return (*bw & mask) ? (*bw &= ~mask, mask) : 0;
}

/* ---------------------------------------------------------------- */

/* XXX: Yeah, using random constant for credits is lame.  */
#define JSTORE_UPDATE_CREDITS		20

/* Our operation context is single update in the journalled store.  */
struct ext3fs_rpc_context
{
  struct rpc_context e3diskfs;
  hurd_ihash_t fragment_ihash;
  struct jstore_update *update;
};

/* ext3fs specific per-file data.  */
struct disknode
{
  /* For a directory, this array holds the number of directory entries in
     each DIRBLKSIZE piece of the directory. */
  int *dirents;

  /* Links on hash list.  */
  struct node *hnext, **hprevp;

  /* Links on orphan list.  For read/write, global_lock must be held.  */
  struct node *onext, *oprev;

  /* Lock to lock while fiddling with this inode's block allocation info.  */
  struct rwlock alloc_lock;

#if 0
  /* Where changes to our indirect blocks are added.  */
  struct scache_bufset *indir_pokel;
#endif

  /* Random extra info used by the ext3 routines.  */
  struct ext3_inode_info info;
  uint32_t info_i_translator;	/* That struct from Linux source lacks this. */

  /* This file's pager.  */
  struct pager *pager;

  /* True if the last page of the file has been made writable, but is only
     partially allocated.  */
  int last_page_partially_writable;

  /* Index to start a directory lookup at.  */
  int dir_idx;
};

struct user_pager_info
{
  uint32_t magic;
#define EXT3FS_UPI_MAGIC		0x43215678
  struct node *node;
  vm_prot_t max_prot;
};

/* ---------------------------------------------------------------- */
/* pager.c */

#define STORE_CACHE_BLOCKS	300

#include "diskfs-pager.h"

/* Set up the disk pager.  */
void create_scache (void);

/* Call this when we should turn off caching so that unused memory object
   ports get freed.  */
void drop_pager_softrefs (struct node *node);

/* Call this when we should turn on caching because it's no longer
   important for unused memory object ports to get freed.  */
void allow_pager_softrefs (struct node *node);

/* Invalidate any pager data associated with NODE.  */
void flush_node_pager (struct node *node);

/* ---------------------------------------------------------------- */

/* The physical media.  */
extern struct store *store;
/* What the user specified.  */
extern struct store_parsed *store_parsed;

/* The scache used for accessing metadata.  */
extern struct scache *scache;

/* The journal of the filesystem.  */
extern struct jstore *jstore;

#if 0
/* Our in-core copy of the super-block (pointer into the scache).  */
struct ext3_super_block *sblock;
/* True if sblock has been modified.  */
int sblock_dirty;
#endif

/* Where the super-block is located on disk (at min-block 1).  */
#define SBLOCK_BLOCK	1	/* Default location, second 1k block.  */
#define SBLOCK_SIZE	(sizeof (struct ext3_super_block))
extern unsigned int sblock_block; /* Specified location (in 1k blocks).  */
#define SBLOCK_OFFS	(sblock_block << 10) /* Byte offset of superblock.  */

/* fs clean before we started writing? */
extern int ext3fs_clean;

/* The filesystem block-size.  */
extern unsigned int block_size;
/* The log base 2 of BLOCK_SIZE.  */
extern unsigned int log2_block_size;

/* The number of bits to scale min-blocks to get filesystem blocks.  */
#define BLOCKSIZE_SCALE	(sblock->s_log_block_size)

/* log2 of the number of device blocks in a filesystem block.  */
extern unsigned log2_dev_blocks_per_fs_block;

/* log2 of the number of stat blocks (512 bytes) in a filesystem block.  */
extern unsigned log2_stat_blocks_per_fs_block;

/* A handy page of page-aligned zeros.  */
extern vm_address_t zeroblock;

/* First block in group descriptors blocks.  */
extern block_t group_desc_first_block;

/* Get the superblock from the disk, & setup various global info from it.  */
void get_hypermetadata ();

/* ---------------------------------------------------------------- */
/* Random stuff calculated from the super block.  */

unsigned long frag_size;	/* Size of a fragment in bytes */
unsigned long frags_per_block;	/* Number of fragments per block */
unsigned long inodes_per_block;	/* Number of inodes per block */

unsigned long itb_per_group;	/* Number of inode table blocks per group */
unsigned long db_per_group;	/* Number of descriptor blocks per group */
unsigned long desc_per_block;	/* Number of group descriptors per block */
unsigned long addr_per_block;	/* Number of disk addresses per block */

unsigned long groups_count;	/* Number of groups in the fs */

ino_t journal_inum;
uint32_t feature_compat;
uint32_t feature_incompat;
uint32_t feature_ro_compat;
uint32_t creator_os;
block_t blocks_count;
block_t first_data_block;
block_t blocks_per_group;
block_t inodes_per_group;

/* These replace the ones in ext3_fs.h and use the cached values.  */
#define EXT3FS_HAS_COMPAT_FEATURE(mask)		(feature_compat & (mask))
#define EXT3FS_HAS_INCOMPAT_FEATURE(mask)	(feature_incompat & (mask))
#define EXT3FS_HAS_RO_COMPAT_FEATURE(mask)	(feature_ro_compat & (mask))

/* ---------------------------------------------------------------- */

spin_lock_t node_to_page_lock;

spin_lock_t generation_lock;
unsigned long next_generation;

/* ---------------------------------------------------------------- */
/* Functions for looking inside scache */

#define trunc_block(offs) (((offs) >> log2_block_size) << log2_block_size)
#define round_block(offs) \
  ((((offs) + block_size - 1) >> log2_block_size) << log2_block_size)

#define inode_group_num(inum) (((inum) - 1) / inodes_per_group)

struct fragment
{
  struct scache_buffer *buffer;
  int ref_count;
};

EXT3FS_EI void *
fragment_get_func(struct ext3fs_rpc_context *rpc_context,
		  scache_block_t block, size_t type_size,
		  int index, int flags)
{
  struct scache_buffer *buffer;
  struct fragment *fragment;
  void *ptr;

  assert (rpc_context);

  if (rpc_context->update)
    buffer = jstore_block_get (rpc_context->update, block, flags);
  else
    buffer = scache_get (scache, block);
  if (! buffer)
    return NULL;
  ptr = buffer->data + index * type_size;
  fragment = hurd_ihash_find (rpc_context->fragment_ihash,
			      (uintptr_t) ptr);
  if (! fragment)
    {
      error_t err;
      fragment = malloc (sizeof *fragment);
      if (! fragment)
	return NULL;
      fragment->buffer = buffer;
      fragment->ref_count = 1;
      err = hurd_ihash_add (rpc_context->fragment_ihash,
			    (uintptr_t) ptr, fragment);
      if (err)
	{
	  errno = err;
	  return NULL;
	}
    }
  else
    fragment->ref_count++;

  return ptr;
}

#define FRAGMENT_GET_FUNC(rpc_context, block, type, index, flags)	\
	(type *) fragment_get_func ((rpc_context), (block),		\
				    sizeof (type), (index), (flags))

#define FRAGMENT_GET(rpc_context, first_block, type, index, flags)	\
	FRAGMENT_GET_FUNC(rpc_context,					\
		          first_block					\
		          + ((index) * sizeof (type) >> log2_block_size), \
		          type,						\
			  (((index) * sizeof (type)) & (block_size - 1)) / \
			    sizeof (type),				\
			  flags)

EXT3FS_EI void
fragment_put (struct ext3fs_rpc_context *rpc_context,
	      void *ptr, int flags)
{
  struct fragment *fragment;

  assert (rpc_context);

  fragment = hurd_ihash_find (rpc_context->fragment_ihash,
			      (uintptr_t) ptr);

  assert (fragment);
  assert (fragment->ref_count >= 1);

  if (rpc_context->update)
    jstore_block_put (rpc_context->update, fragment->buffer, flags);
  else
    scache_put (scache, fragment->buffer);

  fragment->ref_count--;
  if (! fragment->ref_count)
    {
      hurd_ihash_remove (rpc_context->fragment_ihash, (uintptr_t) ptr);
      free (fragment);
    }
}

EXT3FS_EI block_t *
indblocks_get (struct ext3fs_rpc_context *rpc_context,
	       block_t block, int flags)
{
    return FRAGMENT_GET (rpc_context, block, block_t, 0, flags);
}

#define indblocks_put	fragment_put

EXT3FS_EI struct ext3_super_block *
super_block_get (struct ext3fs_rpc_context *rpc_context, int flags)
{
    return FRAGMENT_GET (rpc_context, 0,
			 struct ext3_super_block, 1, flags);
}

#define super_block_put		fragment_put

EXT3FS_EI struct ext3_group_desc *
group_desc_get (struct ext3fs_rpc_context *rpc_context,
		int group_num, int flags)
{
  return FRAGMENT_GET (rpc_context, group_desc_first_block,
		       struct ext3_group_desc, group_num, 0);
}

#define group_desc_put		fragment_put

/* Convert an inode number to the dinode on disk. */
EXT3FS_EI struct ext3_inode *
inode_get (struct ext3fs_rpc_context *rpc_context, ino_t inum, int flags)
{
  unsigned long bg_num = (inum - 1) / inodes_per_group;
  unsigned long group_inum = (inum - 1) % inodes_per_group;
  struct ext3_group_desc *gdp;
  struct ext3_inode *di;

  assert (1 <= inum && inum < inodes_per_group * groups_count);
  gdp = group_desc_get (rpc_context, bg_num, 0);

  di = FRAGMENT_GET (rpc_context, STOH32(gdp->bg_inode_table),
		     struct ext3_inode, group_inum, 0);

  group_desc_put (rpc_context, gdp, JSTORE_PUT_CLEAN);

  ext3_debug ("(%qu) = %p", inum, di);

  return di;
}

#define inode_put		fragment_put

/* ---------------------------------------------------------------- */
/* inode.c */

/* Write all active disknodes into the inode pager. */
void write_all_disknodes ();

/* Lookup node INUM (which must have a reference already) and return it
   without allocating any new references. */
struct node *ifind (ino_t inum);

void inode_init (void);

struct ext3_inode *write_node (struct node *np,
			       struct ext3fs_rpc_context *rpc_context);

/* ---------------------------------------------------------------- */

/* What to lock if changing global data data (e.g., the superblock or block
   group descriptors or bitmaps).  */
spin_lock_t global_lock;

#if 0

/* Where to record such changes.  */
extern struct scache_bufset *global_pokel;

/* If the block size is less than the page size, then this bitmap is used to
   record which disk blocks are actually modified, so we don't stomp on parts
   of the disk which are backed by file pagers.  */
char *modified_global_blocks;
spin_lock_t modified_global_blocks_lock;

/* Marks the global block BLOCK as being modified, and returns true if we
   think it may have been clean before (but we may not be sure).  Note that
   this isn't enough to cause the block to be synced; you must call
   record_global_poke to do that.  */
EXT3FS_EI int
global_block_modified (block_t block)
{
  if (modified_global_blocks)
    {
      int was_clean;
      spin_lock (&modified_global_blocks_lock);
      was_clean = !set_bit(block, modified_global_blocks);
      spin_unlock (&modified_global_blocks_lock);
      return was_clean;
    }
  else
    return 1;
}

/* This records a modification to a non-file block.  */
EXT3FS_EI void
record_global_poke (struct scache_buffer *buffer)
{
  ext3_debug ("(%qu)", buffer->block);
  global_block_modified (buffer->block);
  scache_put_dirty (scache, buffer, global_pokel);
}

/* record_global_poke for group descriptor.  */
EXT3FS_EI void
record_group_desc_poke (int num)
{
  struct scache_buffer *buffer;
  
  buffer = group_desc_buffer (num);
  ext3_debug ("(%d = %Lu)", num, buffer->block);
  scache_ref (buffer);
  record_global_poke (buffer);
}

/* This syncs a modification to a non-file block.  */
EXT3FS_EI void
sync_global_ptr (struct scache_buffer *buffer, int wait)
{
  static struct scache_bufset *group = NULL;

  if (! group)
    {
      group = scache_bufset_new (scache);
      if (! group)
	{
	  ext3_panic ("Error in creating scache group: %s",
		      strerror (errno));
	}
    }

  ext3_debug ("(%Lu)", buffer->block);
  global_block_modified (buffer->block);
  scache_put (scache, buffer);
  scache_mark_dirty (scache, buffer, group);
  scache_bufset_return (group, wait);
}

/* This records a modification to one of a file's indirect blocks.  */
EXT3FS_EI void
record_indir_poke (struct node *node, struct scache_buffer *buffer)
{
  ext3_debug ("(%d, %Lu)", (int)node->cache_id, buffer->block);
  global_block_modified (buffer->block);
  scache_put_dirty (scache, buffer, global_pokel);
}


/* ---------------------------------------------------------------- */

EXT3FS_EI void
sync_global (int wait)
{
  scache_bufset_return (global_pokel, wait);
}

/* Sync all allocation information and node NP if diskfs_synchronous. */
EXT3FS_EI void
alloc_sync (struct node *np)
{
  if (diskfs_synchronous)
    {
      if (np)
	{
	  diskfs_node_update (np, 1);
	  scache_bufset_return (np->dn->indir_pokel, 1);
	}
      diskfs_set_hypermetadata (1, 0);
    }
}

#else /* !0 -- fake functions.  */

EXT3FS_EI void
record_global_poke (struct scache_buffer *buffer)
{
}

EXT3FS_EI void
record_group_desc_poke (int num)
{
}

EXT3FS_EI void
sync_global_ptr (struct scache_buffer *buffer, int wait)
{
}

EXT3FS_EI void
record_indir_poke (struct node *node, struct scache_buffer *buffer)
{
}

EXT3FS_EI void
sync_global (struct ext3fs_rpc_context *rpc_context, int wait)
{
  error_t err;

  if (rpc_context && rpc_context->update)
    {
      err = jstore_update_end (rpc_context->update);
      assert_perror (err);	/* XXX */
    }

  err = jstore_commit (jstore);
  assert_perror (err);		/* XXX */

  if (rpc_context && rpc_context->update)
    {
      rpc_context->update = jstore_update_begin (jstore,
						 JSTORE_UPDATE_CREDITS);
      assert (rpc_context->update);
    }
}

EXT3FS_EI void
alloc_sync (struct node *np)
{
}

#endif /* 0 */


/* ---------------------------------------------------------------- */
/* getblk.c */

void ext3_discard_prealloc (struct node *node,
			    struct ext3fs_rpc_context *rpc_context);

/* Returns in DISK_BLOCK the disk block correspding to BLOCK in NODE.  If
   there is no such block yet, but CREATE is true, then it is created,
   otherwise EINVAL is returned.  */
error_t ext3_getblk (struct node *node, block_t block, int create,
		     block_t *disk_block,
		     struct ext3fs_rpc_context *rpc_context);

block_t ext3_new_block (block_t goal,
			block_t prealloc_goal,
			block_t *prealloc_count, block_t *prealloc_block,
			struct ext3fs_rpc_context *rpc_context);

void ext3_free_blocks (block_t block, unsigned long count,
		       struct ext3fs_rpc_context *rpc_context);

/* ---------------------------------------------------------------- */
/* storeinfo.c */

error_t node_store (struct node *node, struct store **pfile_store,
		    struct ext3fs_rpc_context *rpc_context);

/* ---------------------------------------------------------------- */
/* jstore.c */

error_t ext3_jstore_open (void);

error_t diskfs_rpc_context_new (struct rpc_context **context,
				struct protid *cred);

error_t diskfs_rpc_context_delete (struct rpc_context *context);

/* ---------------------------------------------------------------- */
/* orphan.c */

/**
 * List of orphaned inodes.
 *
 * These are 1) either removed from filesystem namespace, but still
 * opened by process, or 2) being truncated, but truncation is still
 * not finished.
 */
extern struct node *orphan_list;

error_t orphan_cleanup (struct rpc_context *context);

/**
 * Add NP as orphaned inode.
 *
 * Pre: NP must be locked.
 *
 * Post: diskfs_node_update must be called by caller.
 *
 * @param np Node.
 * @param context RPC context.
 * @return Zero on success, EEXIST when node is already orphaned,
 * other error code on error.
 */
error_t orphan_add (struct node *np,
		    struct ext3fs_rpc_context *rpc_context);

/**
 * Remove NP from orphaned inode list.
 *
 * Pre: NP must be locked.
 *
 * @param np Node.
 * @param context RPC context.
 * @return Zero on success, ENOENT when NP is not orphaned, other
 * error code on error.
 */
error_t orphan_remove (struct node *np,
		       struct ext3fs_rpc_context *rpc_context);
/*
 *  linux/include/linux/ext3_fs.h
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/include/linux/minix_fs.h
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#ifndef _LINUX_EXT3_FS_H
#define _LINUX_EXT3_FS_H

struct statfs;

/*
 * The second extended filesystem constants/structures
 */

/*
 * Define EXT3FS_DEBUG to produce debug messages
 */
#define EXT3FS_DEBUG			1

/*
 * Define EXT3_PREALLOCATE to preallocate data blocks for expanding files
 */
#define EXT3_PREALLOCATE		1
#define EXT3_DEFAULT_PREALLOC_BLOCKS	8

/*
 * Always enable hashed directories
 */
#define CONFIG_EXT3_INDEX

/*
 * Debug code
 */
#ifdef EXT3FS_DEBUG
#define ext3_debug(f, a...)						\
	do {								\
		printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:",	\
			__FILE__, __LINE__, __FUNCTION__);		\
		printk (KERN_DEBUG f, ## a);				\
	} while (0)
#else
#define ext3_debug(f, a...)	do {} while (0)
#endif

/*
 * Special inodes numbers
 */
#define	EXT3_BAD_INO		 1	/* Bad blocks inode */
#define EXT3_ROOT_INO		 2	/* Root inode */
#define EXT3_BOOT_LOADER_INO	 5	/* Boot loader inode */
#define EXT3_UNDEL_DIR_INO	 6	/* Undelete directory inode */
#define EXT3_RESIZE_INO		 7	/* Reserved group descriptors inode */
#define EXT3_JOURNAL_INO	 8	/* Journal inode */

/* First non-reserved inode for old ext3 filesystems */
#define EXT3_GOOD_OLD_FIRST_INO	11

/*
 * The second extended file system magic number
 */
#define EXT3_SUPER_MAGIC	0xEF53

/*
 * Maximal count of links to a file
 */
#define EXT3_LINK_MAX		32000

/*
 * Macro-instructions used to manage several block sizes
 */
#define EXT3_MIN_BLOCK_SIZE		1024
#define	EXT3_MAX_BLOCK_SIZE		4096
#define EXT3_MIN_BLOCK_LOG_SIZE		  10
#ifdef __KERNEL__
# define EXT3_BLOCK_SIZE(s)		((s)->s_blocksize)
#else
# define EXT3_BLOCK_SIZE(s)		(EXT3_MIN_BLOCK_SIZE << STOH32((s)->s_log_block_size))
#endif
#define	EXT3_ADDR_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (__u32))
#ifdef __KERNEL__
# define EXT3_BLOCK_SIZE_BITS(s)	STOH32(((s)->s_blocksize_bits))
#else
# define EXT3_BLOCK_SIZE_BITS(s)	(STOH32((s)->s_log_block_size) + 10)
#endif
#ifdef __KERNEL__
#define	EXT3_ADDR_PER_BLOCK_BITS(s)	(EXT3_SB(s)->s_addr_per_block_bits)
#define EXT3_INODE_SIZE(s)		(EXT3_SB(s)->s_inode_size)
#define EXT3_FIRST_INO(s)		(EXT3_SB(s)->s_first_ino)
#else
#define EXT3_INODE_SIZE(s)	(STOH32(((s)->s_rev_level) == EXT3_GOOD_OLD_REV) ? \
				 EXT3_GOOD_OLD_INODE_SIZE : \
				 STOH32((s)->s_inode_size))
#define EXT3_FIRST_INO(s)	(STOH32(((s)->s_rev_level) == EXT3_GOOD_OLD_REV) ? \
				 EXT3_GOOD_OLD_FIRST_INO : \
				 STOH32((s)->s_first_ino))
#endif

/*
 * Macro-instructions used to manage fragments
 */
#define EXT3_MIN_FRAG_SIZE		1024
#define	EXT3_MAX_FRAG_SIZE		4096
#define EXT3_MIN_FRAG_LOG_SIZE		  10
#if 0
#ifdef __KERNEL__
# define EXT3_FRAG_SIZE(s)		(EXT3_SB(s)->s_frag_size)
# define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_SB(s)->s_frags_per_block)
#else
# define EXT3_FRAG_SIZE(s)		(EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size)
# define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s))
#endif
#endif /* 0 */

/*
 * Structure of a blocks group descriptor
 */
struct ext3_group_desc
{
	__u32	bg_block_bitmap;		/* Blocks bitmap block */
	__u32	bg_inode_bitmap;		/* Inodes bitmap block */
	__u32	bg_inode_table;		/* Inodes table block */
	__u16	bg_free_blocks_count;	/* Free blocks count */
	__u16	bg_free_inodes_count;	/* Free inodes count */
	__u16	bg_used_dirs_count;	/* Directories count */
	__u16	bg_pad;
	__u32	bg_reserved[3];
};

/*
 * Macro-instructions used to manage group descriptors
 */
#ifdef __KERNEL__
# define EXT3_BLOCKS_PER_GROUP(s)	(EXT3_SB(s)->s_blocks_per_group)
# define EXT3_DESC_PER_BLOCK(s)		(EXT3_SB(s)->s_desc_per_block)
# define EXT3_INODES_PER_GROUP(s)	(EXT3_SB(s)->s_inodes_per_group)
# define EXT3_DESC_PER_BLOCK_BITS(s)	(EXT3_SB(s)->s_desc_per_block_bits)
#else
# define EXT3_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
# define EXT3_DESC_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc))
# define EXT3_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
#endif

/*
 * Constants relative to the data blocks
 */
#define	EXT3_NDIR_BLOCKS		12
#define	EXT3_IND_BLOCK			EXT3_NDIR_BLOCKS
#define	EXT3_DIND_BLOCK			(EXT3_IND_BLOCK + 1)
#define	EXT3_TIND_BLOCK			(EXT3_DIND_BLOCK + 1)
#define	EXT3_N_BLOCKS			(EXT3_TIND_BLOCK + 1)

/*
 * Inode flags
 */
#define	EXT3_SECRM_FL			0x00000001 /* Secure deletion */
#define	EXT3_UNRM_FL			0x00000002 /* Undelete */
#define	EXT3_COMPR_FL			0x00000004 /* Compress file */
#define EXT3_SYNC_FL			0x00000008 /* Synchronous updates */
#define EXT3_IMMUTABLE_FL		0x00000010 /* Immutable file */
#define EXT3_APPEND_FL			0x00000020 /* writes to file may only append */
#define EXT3_NODUMP_FL			0x00000040 /* do not dump file */
#define EXT3_NOATIME_FL			0x00000080 /* do not update atime */
/* Reserved for compression usage... */
#define EXT3_DIRTY_FL			0x00000100
#define EXT3_COMPRBLK_FL		0x00000200 /* One or more compressed clusters */
#define EXT3_NOCOMPR_FL			0x00000400 /* Don't compress */
#define EXT3_ECOMPR_FL			0x00000800 /* Compression error */
/* End compression flags --- maybe not all used */
#define EXT3_INDEX_FL			0x00001000 /* hash-indexed directory */
#define EXT3_IMAGIC_FL			0x00002000 /* AFS directory */
#define EXT3_JOURNAL_DATA_FL		0x00004000 /* file data should be journaled */
#define EXT3_NOTAIL_FL			0x00008000 /* file tail should not be merged */
#define EXT3_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
#define EXT3_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
#define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */

#define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
#define EXT3_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */

/*
 * Inode dynamic state flags
 */
#define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
#define EXT3_STATE_NEW			0x00000002 /* inode is newly created */

/*
 * ioctl commands
 */
#define	EXT3_IOC_GETFLAGS		_IOR('f', 1, long)
#define	EXT3_IOC_SETFLAGS		_IOW('f', 2, long)
#define	EXT3_IOC_GETVERSION		_IOR('f', 3, long)
#define	EXT3_IOC_SETVERSION		_IOW('f', 4, long)
#define	EXT3_IOC_GETVERSION_OLD		_IOR('v', 1, long)
#define	EXT3_IOC_SETVERSION_OLD		_IOW('v', 2, long)
#ifdef CONFIG_JBD_DEBUG
#define EXT3_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
#endif

/*
 * Structure of an inode on the disk
 */
struct ext3_inode {
	__u16	i_mode;		/* File mode */
	__u16	i_uid;		/* Low 16 bits of Owner Uid */
	__u32	i_size;		/* Size in bytes */
	__u32	i_atime;	/* Access time */
	__u32	i_ctime;	/* Creation time */
	__u32	i_mtime;	/* Modification time */
	__u32	i_dtime;	/* Deletion Time */
	__u16	i_gid;		/* Low 16 bits of Group Id */
	__u16	i_links_count;	/* Links count */
	__u32	i_blocks;	/* Blocks count */
	__u32	i_flags;	/* File flags */
	union {
		struct {
			__u32  l_i_reserved1;
		} linux1;
		struct {
			__u32  h_i_translator;
		} hurd1;
		struct {
			__u32  m_i_reserved1;
		} masix1;
	} osd1;				/* OS dependent 1 */
	__u32	i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
	__u32	i_generation;	/* File version (for NFS) */
	__u32	i_file_acl;	/* File ACL */
	__u32	i_dir_acl;	/* Directory ACL */
	__u32	i_faddr;	/* Fragment address */
	union {
		struct {
			__u8	l_i_frag;	/* Fragment number */
			__u8	l_i_fsize;	/* Fragment size */
			__u16	i_pad1;
			__u16	l_i_uid_high;	/* these 2 fields    */
			__u16	l_i_gid_high;	/* were reserved2[0] */
			__u32	l_i_reserved2;
		} linux2;
		struct {
			__u8	h_i_frag;	/* Fragment number */
			__u8	h_i_fsize;	/* Fragment size */
			__u16	h_i_mode_high;
			__u16	h_i_uid_high;
			__u16	h_i_gid_high;
			__u32	h_i_author;
		} hurd2;
		struct {
			__u8	m_i_frag;	/* Fragment number */
			__u8	m_i_fsize;	/* Fragment size */
			__u16	m_pad1;
			__u32	m_i_reserved2[2];
		} masix2;
	} osd2;				/* OS dependent 2 */
};

#define i_size_high	i_dir_acl

#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1	osd1.linux1.l_i_reserved1
#define i_frag		osd2.linux2.l_i_frag
#define i_fsize		osd2.linux2.l_i_fsize
#define i_uid_low	i_uid
#define i_gid_low	i_gid
#define i_uid_high	osd2.linux2.l_i_uid_high
#define i_gid_high	osd2.linux2.l_i_gid_high
#define i_reserved2	osd2.linux2.l_i_reserved2

#elif defined(__GNU__)

#define i_translator	osd1.hurd1.h_i_translator
#define i_frag		osd2.hurd2.h_i_frag;
#define i_fsize		osd2.hurd2.h_i_fsize;
#define i_uid_high	osd2.hurd2.h_i_uid_high
#define i_gid_high	osd2.hurd2.h_i_gid_high
#define i_author	osd2.hurd2.h_i_author

#elif defined(__masix__)

#define i_reserved1	osd1.masix1.m_i_reserved1
#define i_frag		osd2.masix2.m_i_frag
#define i_fsize		osd2.masix2.m_i_fsize
#define i_reserved2	osd2.masix2.m_i_reserved2

#endif /* defined(__KERNEL__) || defined(__linux__) */

/*
 * File system states
 */
#define	EXT3_VALID_FS			0x0001	/* Unmounted cleanly */
#define	EXT3_ERROR_FS			0x0002	/* Errors detected */
#define	EXT3_ORPHAN_FS			0x0004	/* Orphans being recovered */

/*
 * Mount flags
 */
#define EXT3_MOUNT_CHECK		0x0001	/* Do mount-time checks */
#define EXT3_MOUNT_OLDALLOC		0x0002  /* Don't use the new Orlov allocator */
#define EXT3_MOUNT_GRPID		0x0004	/* Create files with directory's group */
#define EXT3_MOUNT_DEBUG		0x0008	/* Some debugging messages */
#define EXT3_MOUNT_ERRORS_CONT		0x0010	/* Continue on errors */
#define EXT3_MOUNT_ERRORS_RO		0x0020	/* Remount fs ro on errors */
#define EXT3_MOUNT_ERRORS_PANIC		0x0040	/* Panic on errors */
#define EXT3_MOUNT_MINIX_DF		0x0080	/* Mimics the Minix statfs */
#define EXT3_MOUNT_NOLOAD		0x0100	/* Don't use existing journal*/
#define EXT3_MOUNT_ABORT		0x0200	/* Fatal error detected */
#define EXT3_MOUNT_DATA_FLAGS		0x0C00	/* Mode for data writes: */
  #define EXT3_MOUNT_JOURNAL_DATA	0x0400	/* Write data to journal */
  #define EXT3_MOUNT_ORDERED_DATA	0x0800	/* Flush data before commit */
  #define EXT3_MOUNT_WRITEBACK_DATA	0x0C00	/* No data ordering */
#define EXT3_MOUNT_UPDATE_JOURNAL	0x1000	/* Update the journal format */
#define EXT3_MOUNT_NO_UID32		0x2000  /* Disable 32-bit UIDs */
#define EXT3_MOUNT_XATTR_USER		0x4000	/* Extended user attributes */
#define EXT3_MOUNT_POSIX_ACL		0x8000	/* POSIX Access Control Lists */

/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt)		o &= ~EXT3_MOUNT_##opt
#define set_opt(o, opt)			o |= EXT3_MOUNT_##opt
#define test_opt(sb, opt)		(EXT3_SB(sb)->s_mount_opt & \
					 EXT3_MOUNT_##opt)
#else
#define EXT2_MOUNT_NOLOAD		EXT3_MOUNT_NOLOAD
#define EXT2_MOUNT_ABORT		EXT3_MOUNT_ABORT
#define EXT2_MOUNT_DATA_FLAGS		EXT3_MOUNT_DATA_FLAGS
#endif

#define ext3_set_bit			ext2_set_bit
#define ext3_set_bit_atomic		ext2_set_bit_atomic
#define ext3_clear_bit			ext2_clear_bit
#define ext3_clear_bit_atomic		ext2_clear_bit_atomic
#define ext3_test_bit			ext2_test_bit
#define ext3_find_first_zero_bit	ext2_find_first_zero_bit
#define ext3_find_next_zero_bit		ext2_find_next_zero_bit

/*
 * Maximal mount counts between two filesystem checks
 */
#define EXT3_DFL_MAX_MNT_COUNT		20	/* Allow 20 mounts */
#define EXT3_DFL_CHECKINTERVAL		0	/* Don't use interval check */

/*
 * Behaviour when detecting errors
 */
#define EXT3_ERRORS_CONTINUE		1	/* Continue execution */
#define EXT3_ERRORS_RO			2	/* Remount fs read-only */
#define EXT3_ERRORS_PANIC		3	/* Panic */
#define EXT3_ERRORS_DEFAULT		EXT3_ERRORS_CONTINUE

/*
 * Structure of the super block
 */
struct ext3_super_block {
/*00*/	__u32	s_inodes_count;		/* Inodes count */
	__u32	s_blocks_count;		/* Blocks count */
	__u32	s_r_blocks_count;	/* Reserved blocks count */
	__u32	s_free_blocks_count;	/* Free blocks count */
/*10*/	__u32	s_free_inodes_count;	/* Free inodes count */
	__u32	s_first_data_block;	/* First Data Block */
	__u32	s_log_block_size;	/* Block size */
	__s32	s_log_frag_size;	/* Fragment size */
/*20*/	__u32	s_blocks_per_group;	/* # Blocks per group */
	__u32	s_frags_per_group;	/* # Fragments per group */
	__u32	s_inodes_per_group;	/* # Inodes per group */
	__u32	s_mtime;		/* Mount time */
/*30*/	__u32	s_wtime;		/* Write time */
	__u16	s_mnt_count;		/* Mount count */
	__s16	s_max_mnt_count;	/* Maximal mount count */
	__u16	s_magic;		/* Magic signature */
	__u16	s_state;		/* File system state */
	__u16	s_errors;		/* Behaviour when detecting errors */
	__u16	s_minor_rev_level;	/* minor revision level */
/*40*/	__u32	s_lastcheck;		/* time of last check */
	__u32	s_checkinterval;	/* max. time between checks */
	__u32	s_creator_os;		/* OS */
	__u32	s_rev_level;		/* Revision level */
/*50*/	__u16	s_def_resuid;		/* Default uid for reserved blocks */
	__u16	s_def_resgid;		/* Default gid for reserved blocks */
	/*
	 * These fields are for EXT3_DYNAMIC_REV superblocks only.
	 *
	 * Note: the difference between the compatible feature set and
	 * the incompatible feature set is that if there is a bit set
	 * in the incompatible feature set that the kernel doesn't
	 * know about, it should refuse to mount the filesystem.
	 *
	 * e2fsck's requirements are more strict; if it doesn't know
	 * about a feature in either the compatible or incompatible
	 * feature set, it must abort and not try to meddle with
	 * things it doesn't understand...
	 */
	__u32	s_first_ino;		/* First non-reserved inode */
	__u16   s_inode_size;		/* size of inode structure */
	__u16	s_block_group_nr;	/* block group # of this superblock */
	__u32	s_feature_compat;	/* compatible feature set */
/*60*/	__u32	s_feature_incompat;	/* incompatible feature set */
	__u32	s_feature_ro_compat;	/* readonly-compatible feature set */
/*68*/	__u8	s_uuid[16];		/* 128-bit uuid for volume */
/*78*/	char	s_volume_name[16];	/* volume name */
/*88*/	char	s_last_mounted[64];	/* directory where last mounted */
/*C8*/	__u32	s_algorithm_usage_bitmap; /* For compression */
	/*
	 * Performance hints.  Directory preallocation should only
	 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
	 */
	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
	__u16	s_padding1;
	/*
	 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
	 */
/*D0*/	__u8	s_journal_uuid[16];	/* uuid of journal superblock */
/*E0*/	__u32	s_journal_inum;		/* inode number of journal file */
	__u32	s_journal_dev;		/* device number of journal file */
	__u32	s_last_orphan;		/* start of list of inodes to delete */
	__u32	s_hash_seed[4];		/* HTREE hash seed */
	__u8	s_def_hash_version;	/* Default hash version to use */
	__u8	s_reserved_char_pad;
	__u16	s_reserved_word_pad;
	__u32	s_default_mount_opts;
	__u32	s_first_meta_bg; 	/* First metablock block group */
	__u32	s_reserved[190];	/* Padding to the end of the block */
};

#ifdef __KERNEL__
static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
{
	return sb->s_fs_info;
}
static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
{
	return container_of(inode, struct ext3_inode_info, vfs_inode);
}
#else
/* Assume that user mode programs are passing in an ext3fs superblock, not
 * a kernel struct super_block.  This will allow us to call the feature-test
 * macros from user land. */
#define EXT3_SB(sb)	(sb)
#endif

#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime

/*
 * Codes for operating systems
 */
#define EXT3_OS_LINUX		0
#define EXT3_OS_HURD		1
#define EXT3_OS_MASIX		2
#define EXT3_OS_FREEBSD		3
#define EXT3_OS_LITES		4

/*
 * Revision levels
 */
#define EXT3_GOOD_OLD_REV	0	/* The good old (original) format */
#define EXT3_DYNAMIC_REV	1	/* V2 format w/ dynamic inode sizes */

#define EXT3_CURRENT_REV	EXT3_GOOD_OLD_REV
#define EXT3_MAX_SUPP_REV	EXT3_DYNAMIC_REV

#define EXT3_GOOD_OLD_INODE_SIZE 128

/*
 * Feature set definitions
 */

#define EXT3_HAS_COMPAT_FEATURE(sb,mask)			\
	( STOH32(EXT3_SB(sb)->s_feature_compat) & (mask) )
#define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask)			\
	( STOH32(EXT3_SB(sb)->s_feature_ro_compat) & (mask) )
#define EXT3_HAS_INCOMPAT_FEATURE(sb,mask)			\
	( STOH32(EXT3_SB(sb)->s_feature_incompat) & (mask) )
#define EXT3_SET_COMPAT_FEATURE(sb,mask)			\
	EXT3_SB(sb)->s_feature_compat |= (mask)
#define EXT3_SET_RO_COMPAT_FEATURE(sb,mask)			\
	EXT3_SB(sb)->s_feature_ro_compat |= (mask)
#define EXT3_SET_INCOMPAT_FEATURE(sb,mask)			\
	EXT3_SB(sb)->s_feature_incompat |= (mask)
#define EXT3_CLEAR_COMPAT_FEATURE(sb,mask)			\
	EXT3_SB(sb)->s_feature_compat &= ~(mask)
#define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask)			\
	EXT3_SB(sb)->s_feature_ro_compat &= ~(mask)
#define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask)			\
	EXT3_SB(sb)->s_feature_incompat &= ~(mask)

#define EXT3_FEATURE_COMPAT_DIR_PREALLOC	0x0001
#define EXT3_FEATURE_COMPAT_IMAGIC_INODES	0x0002
#define EXT3_FEATURE_COMPAT_HAS_JOURNAL		0x0004
#define EXT3_FEATURE_COMPAT_EXT_ATTR		0x0008
#define EXT3_FEATURE_COMPAT_RESIZE_INODE	0x0010
#define EXT3_FEATURE_COMPAT_DIR_INDEX		0x0020

#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR	0x0004

#define EXT3_FEATURE_INCOMPAT_COMPRESSION	0x0001
#define EXT3_FEATURE_INCOMPAT_FILETYPE		0x0002
#define EXT3_FEATURE_INCOMPAT_RECOVER		0x0004 /* Needs recovery */
#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
#define EXT3_FEATURE_INCOMPAT_META_BG		0x0010

#define EXT3_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
#define EXT3_FEATURE_INCOMPAT_SUPP	(EXT3_FEATURE_INCOMPAT_FILETYPE| \
					 EXT3_FEATURE_INCOMPAT_RECOVER| \
					 EXT3_FEATURE_INCOMPAT_META_BG)
#define EXT3_FEATURE_RO_COMPAT_SUPP	(EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
					 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
					 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)

/*
 * Default values for user and/or group using reserved blocks
 */
#define	EXT3_DEF_RESUID		0
#define	EXT3_DEF_RESGID		0

/*
 * Default mount options
 */
#define EXT3_DEFM_DEBUG		0x0001
#define EXT3_DEFM_BSDGROUPS	0x0002
#define EXT3_DEFM_XATTR_USER	0x0004
#define EXT3_DEFM_ACL		0x0008
#define EXT3_DEFM_UID16		0x0010
#define EXT3_DEFM_JMODE		0x0060
#define EXT3_DEFM_JMODE_DATA	0x0020
#define EXT3_DEFM_JMODE_ORDERED	0x0040
#define EXT3_DEFM_JMODE_WBACK	0x0060

/*
 * Structure of a directory entry
 */
#define EXT3_NAME_LEN 255

struct ext3_dir_entry {
	__u32	inode;			/* Inode number */
	__u16	rec_len;		/* Directory entry length */
	__u16	name_len;		/* Name length */
	char	name[EXT3_NAME_LEN];	/* File name */
};

/*
 * The new version of the directory entry.  Since EXT3 structures are
 * stored in intel byte order, and the name_len field could never be
 * bigger than 255 chars, it's safe to reclaim the extra byte for the
 * file_type field.
 */
struct ext3_dir_entry_2 {
	__u32	inode;			/* Inode number */
	__u16	rec_len;		/* Directory entry length */
	__u8	name_len;		/* Name length */
	__u8	file_type;
	char	name[EXT3_NAME_LEN];	/* File name */
};

/*
 * Ext3 directory file types.  Only the low 3 bits are used.  The
 * other bits are reserved for now.
 */
#define EXT3_FT_UNKNOWN		0
#define EXT3_FT_REG_FILE	1
#define EXT3_FT_DIR		2
#define EXT3_FT_CHRDEV		3
#define EXT3_FT_BLKDEV		4
#define EXT3_FT_FIFO		5
#define EXT3_FT_SOCK		6
#define EXT3_FT_SYMLINK		7

#define EXT3_FT_MAX		8

/*
 * EXT3_DIR_PAD defines the directory entries boundaries
 *
 * NOTE: It must be a multiple of 4
 */
#define EXT3_DIR_PAD			4
#define EXT3_DIR_ROUND			(EXT3_DIR_PAD - 1)
#define EXT3_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT3_DIR_ROUND) & \
					 ~EXT3_DIR_ROUND)
/*
 * Hash Tree Directory indexing
 * (c) Daniel Phillips, 2001
 */

#ifdef CONFIG_EXT3_INDEX
  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
					      EXT3_FEATURE_COMPAT_DIR_INDEX) && \
		      (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
#else
  #define is_dx(dir) 0
#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
#endif

/* Legal values for the dx_root hash_version field: */

#define DX_HASH_LEGACY		0
#define DX_HASH_HALF_MD4	1
#define DX_HASH_TEA		2

/* hash info structure used by the directory hash */
struct dx_hash_info
{
	u32		hash;
	u32		minor_hash;
	int		hash_version;
	u32		*seed;
};

#define EXT3_HTREE_EOF	0x7fffffff

#ifdef __KERNEL__
/*
 * Control parameters used by ext3_htree_next_block
 */
#define HASH_NB_ALWAYS		1


/*
 * Describe an inode's exact location on disk and in memory
 */
struct ext3_iloc
{
	struct buffer_head *bh;
	unsigned long offset;
	unsigned long block_group;
};

static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc)
{
	return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset);
}

/*
 * This structure is stuffed into the struct file's private_data field
 * for directories.  It is where we put information so that we can do
 * readdir operations in hash tree order.
 */
struct dir_private_info {
	struct rb_root	root;
	struct rb_node	*curr_node;
	struct fname	*extra_fname;
	loff_t		last_pos;
	__u32		curr_hash;
	__u32		curr_minor_hash;
	__u32		next_hash;
};

/*
 * Special error return code only used by dx_probe() and its callers.
 */
#define ERR_BAD_DX_DIR	-75000

/*
 * Function prototypes
 */

/*
 * Ok, these declarations are also in <linux/kernel.h> but none of the
 * ext3 source programs needs to include it so they are duplicated here.
 */
# define NORET_TYPE    /**/
# define ATTRIB_NORET  __attribute__((noreturn))
# define NORET_AND     noreturn,

/* balloc.c */
extern int ext3_bg_has_super(struct super_block *sb, int group);
extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
					    __u32 *, __u32 *, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
			      unsigned long);
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *,
				      struct ext3fs_rpc_context *rpc_context);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
						    unsigned int block_group,
						    struct buffer_head ** bh);

/* dir.c */
extern int ext3_check_dir_entry(const char *, struct inode *,
				struct ext3_dir_entry_2 *,
				struct buffer_head *, unsigned long);
extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
				    __u32 minor_hash,
				    struct ext3_dir_entry_2 *dirent);
extern void ext3_htree_free_dir_info(struct dir_private_info *p);

/* fsync.c */
extern int ext3_sync_file (struct file *, struct dentry *, int);

/* hash.c */
extern int ext3fs_dirhash(const char *name, int len, struct
			  dx_hash_info *hinfo);

/* ialloc.c */
extern struct inode * ext3_new_inode (handle_t *, struct inode *, int);
extern void ext3_free_inode (handle_t *, struct inode *);
extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
extern unsigned long ext3_count_free_inodes (struct super_block *);
extern unsigned long ext3_count_dirs (struct super_block *);
extern void ext3_check_inodes_bitmap (struct super_block *);
extern unsigned long ext3_count_free (struct buffer_head *, unsigned);


/* inode.c */
extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);

extern void ext3_read_inode (struct inode *);
extern void ext3_write_inode (struct inode *, int);
extern int  ext3_setattr (struct dentry *, struct iattr *);
extern void ext3_put_inode (struct inode *);
extern void ext3_delete_inode (struct inode *);
extern int  ext3_sync_inode (handle_t *, struct inode *);
extern void ext3_discard_prealloc (struct inode *);
extern void ext3_dirty_inode(struct inode *);
extern int ext3_change_inode_journal_flag(struct inode *, int);
extern void ext3_truncate (struct inode *);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_set_aops(struct inode *inode);

/* ioctl.c */
extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
		       unsigned long);

/* namei.c */
extern int ext3_orphan_add(handle_t *, struct inode *);
extern int ext3_orphan_del(handle_t *, struct inode *);
extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
				__u32 start_minor_hash, __u32 *next_hash);

/* super.c */
extern void ext3_error (struct super_block *, const char *, const char *, ...)
	__attribute__ ((format (printf, 3, 4)));
extern void __ext3_std_error (struct super_block *, const char *, int);
extern void ext3_abort (struct super_block *, const char *, const char *, ...)
	__attribute__ ((format (printf, 3, 4)));
extern NORET_TYPE void ext3_panic (struct super_block *, const char *,
				   const char *, ...)
	__attribute__ ((NORET_AND format (printf, 3, 4)));
extern void ext3_warning (struct super_block *, const char *, const char *, ...)
	__attribute__ ((format (printf, 3, 4)));
extern void ext3_update_dynamic_rev (struct super_block *sb);
extern void ext3_put_super (struct super_block *);
extern void ext3_write_super (struct super_block *);
extern void ext3_write_super_lockfs (struct super_block *);
extern void ext3_unlockfs (struct super_block *);
extern int ext3_remount (struct super_block *, int *, char *);
extern int ext3_statfs (struct super_block *, struct kstatfs *);

#define ext3_std_error(sb, errno)				\
do {								\
	if ((errno))						\
		__ext3_std_error((sb), __FUNCTION__, (errno));	\
} while (0)
extern const char *ext3_decode_error(struct super_block *sb, int errno, char nbuf[16]);

/*
 * Inodes and files operations
 */

/* dir.c */
extern struct file_operations ext3_dir_operations;

/* file.c */
extern struct inode_operations ext3_file_inode_operations;
extern struct file_operations ext3_file_operations;

/* namei.c */
extern struct inode_operations ext3_dir_inode_operations;
extern struct inode_operations ext3_special_inode_operations;

/* symlink.c */
extern struct inode_operations ext3_symlink_inode_operations;
extern struct inode_operations ext3_fast_symlink_inode_operations;


#endif	/* __KERNEL__ */

#endif	/* _LINUX_EXT3_FS_H */
/*
 *  linux/include/linux/ext3_fs_i.h
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/include/linux/minix_fs_i.h
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#ifndef _LINUX_EXT3_FS_I
#define _LINUX_EXT3_FS_I

/*
 * second extended file system inode data in memory
 */
struct ext3_inode_info {
	__u32	i_data[15];
	__u32	i_flags;
#ifdef EXT3_FRAGMENTS
	__u32	i_faddr;
	__u8	i_frag_no;
	__u8	i_frag_size;
#endif
	__u32	i_file_acl;
	__u32	i_dir_acl;
	__u32	i_dtime;

	/*
	 * i_block_group is the number of the block group which contains
	 * this file's inode.  Constant across the lifetime of the inode,
	 * it is ued for making block allocation decisions - we try to
	 * place a file's data blocks near its inode block, and new inodes
	 * near to their parent directory's inode.
	 */
	__u32	i_block_group;
	__u32	i_state;		/* Dynamic state flags for ext3 */

	/*
	 * i_next_alloc_block is the logical (file-relative) number of the
	 * most-recently-allocated block in this file.  Yes, it is misnamed.
	 * We use this for detecting linearly ascending allocation requests.
	 */
	__u32	i_next_alloc_block;

	/*
	 * i_next_alloc_goal is the *physical* companion to i_next_alloc_block.
	 * it the the physical block number of the block which was most-recently
	 * allocated to this file.  This give us the goal (target) for the next
	 * allocation when we detect linearly ascending requests.
	 */
	__u32	i_next_alloc_goal;
#ifdef EXT3_PREALLOCATE
	__u32	i_prealloc_block;
	__u32	i_prealloc_count;
#endif
	__u32	i_dir_start_lookup;
#ifdef CONFIG_EXT3_FS_XATTR
	/*
	 * Extended attributes can be read independently of the main file
	 * data. Taking i_sem even when reading would cause contention
	 * between readers of EAs and writers of regular file data, so
	 * instead we synchronize on xattr_sem when reading or changing
	 * EAs.
	 */
	struct rw_semaphore xattr_sem;
#endif
#ifdef CONFIG_EXT3_FS_POSIX_ACL
	struct posix_acl	*i_acl;
	struct posix_acl	*i_default_acl;
#endif

#ifndef __hurd__
	struct list_head i_orphan;	/* unlinked but open inodes */
#endif /* !__hurd__ */

	/*
	 * i_disksize keeps track of what the inode size is ON DISK, not
	 * in memory.  During truncate, i_size is set to the new size by
	 * the VFS prior to calling ext3_truncate(), but the filesystem won't
	 * set i_disksize to 0 until the truncate is actually under way.
	 *
	 * The intent is that i_disksize always represents the blocks which
	 * are used by this file.  This allows recovery to restart truncate
	 * on orphans if we crash during truncate.  We actually write i_disksize
	 * into the on-disk inode when writing inodes out, instead of i_size.
	 *
	 * The only time when i_disksize and i_size may be different is when
	 * a truncate is in progress.  The only things which change i_disksize
	 * are ext3_get_block (growth) and ext3_truncate (shrinkth).
	 */
	loff_t	i_disksize;

#ifndef __hurd__
	/*
	 * truncate_sem is for serialising ext3_truncate() against
	 * ext3_getblock().  In the 2.4 ext2 design, great chunks of inode's
	 * data tree are chopped off during truncate. We can't do that in
	 * ext3 because whenever we perform intermediate commits during
	 * truncate, the inode and all the metadata blocks *must* be in a
	 * consistent state which allows truncation of the orphans to restart
	 * during recovery.  Hence we must fix the get_block-vs-truncate race
	 * by other means, so we have truncate_sem.
	 */
	struct semaphore truncate_sem;
	struct inode vfs_inode;
#endif /* !__hurd__ */

#ifdef __hurd__
	__u32	i_high_size;
#endif /* __hurd__ */
};

#endif	/* _LINUX_EXT3_FS_I */