From ec0e559f9d748abdcf0f6989cdec9d50f8e36ff7 Mon Sep 17 00:00:00 2001 From: Camilla Berglund Date: Fri, 13 Jan 2006 15:53:14 +0000 Subject: [PATCH] Corrected year (again), updates. --- ChangeLog | 12 ++++++--- README | 75 ++++++++++++++++++------------------------------------- TODO | 11 ++++---- duff.c | 26 +++++++++---------- duff.h | 8 +++--- 5 files changed, 54 insertions(+), 78 deletions(-) diff --git a/ChangeLog b/ChangeLog index adc2d58..79983e0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,10 @@ Change log for duff Started as of version 0.3.1. +2006-01-11 Camilla Berglund + + * duffdriver.c (stat_file): Added parenthesis to remove warning. + 2006-01-09 Camilla Berglund * duff.h (copy_entry): Removed unused function. @@ -14,9 +18,9 @@ Started as of version 0.3.1. * join-duplicates.sh: Removed logging, added some error checking and mktemp. -2006-01-08 +2006-01-08 Camilla Berglund - * join-duplicates.sh: Added file. + * join-duplicates.sh: Added file (credits to "Snow"). 2006-01-07 Camilla Berglund @@ -44,8 +48,8 @@ Started as of version 0.3.1. 2006-01-03 Camilla Berglund - * duff.h: Added inode and device fields to struct Entry (credits to Snow). - * duffdriver.c (process_path): Implemented physical mode (credits to Snow). + * duff.h: Added inode and device fields to struct Entry (credits to "Snow"). + * duffdriver.c (process_path): Implemented physical mode (credits to "Snow"). * duff.c (main, usage): Added -p option for physical mode. * duff.h: Added backlink to struct Entry. diff --git a/README b/README index 6d346db..692864c 100644 --- a/README +++ b/README @@ -36,7 +36,7 @@ The version numbering scheme for duff is as follows: 1. License and copyright ======================== -Duff is copyright (c) 2006 Camilla Berglund +Duff is copyright (c) 2005 Camilla Berglund Duff is licensed under the zlib/libpng license. See the file `COPYING' for license details. The license is also included at the top of each source file. @@ -64,11 +64,13 @@ magic should be required. If it is, then that's a bug and should be reported. This release of duff has been successfully built on the following systems: + Arch Linux x86 Darwin 7.9.0 powerpc Debian Etch powerpc Debian Sarge alpha FreeBSD 4.11 x86 FreeBSD 5.4 x86 + NetBSD 1.6.1 sparc SunOS 5.9 sparc64 Ubuntu Breezy x86 @@ -82,7 +84,8 @@ Earlier releases have been successfully built on the following systems: FreeBSD 5.4 x86 SunOS 5.9 sparc64 -However, it should build on most Unix systems without modifications. +The tools used were gcc and GNU or BSD make. However, it should build on most +Unix systems without modifications. 4. Installing Duff @@ -100,6 +103,10 @@ To read the manpage before installation, use the following command: groff -mdoc -Tascii duff.1 | less -R +On Linux systems, however, the following command may suffice: + + man -l duff.1 + 6. Hacking Duff =============== @@ -107,48 +114,7 @@ To read the manpage before installation, use the following command: See the file `HACKING'. -7. Frequently asked questions -============================= - -Q: Hey, that is O(n^2) right? -A: Well, sort of. Lots of very intelligent people keep telling me it's - O(n log n), and present complex solutions that, when implemented, don't give - any significant advantage in real world use. Thus, my particular - implementation is still O(n^2). - - I won't even pretend to call this a benchmark, but duff running cold on a - directory tree with over 2000 images (190MB) found the 13 duplicate clusters - in 4 seconds, and this on my ageing laptop. - -Q: How does it work, then? -A: The basic idea (as of version 0.3) is: - - * Only compare files if they're of equal size. - * Compare a few bytes before checksumming large files. - * Compare checksums before actual contents. - * Don't compare actual contents unless explicitly asked. - -Q: How do you calculate the checksum? -A: It is a regular SHA1 message digest, calculated using sha1-asaddi. - -Q: What is it good for? -A: Getting a list of, and then usually removing or joining, duplicates in a - given set of files. Note that duff itself never modifies any files, but it's - designed to play nice with tools that do. - -Q: Is duff named after Tom Duff? -A: As much as I like Duff's device, no, it isn't. Duff stands for DUplicate - File Finder. - - (No, it's not named after the beer, either.) - -Q: Shouldn't duff also do `x'? -A: I don't know, but you're welcome to write a patch to make it do `x', or send - me an email explaining why you think duff should do `x'. If I like your - patch or email, a future version of duff will probably do `x'. - - -8. Bugs, feedback and patches +7. Bugs, feedback and patches ============================= Please send bug reports, feedback, patches and cookies to: @@ -158,22 +124,29 @@ For more involved discussions, please join the mailing list: http://lists.sourceforge.net/lists/listinfo/duff-devel -9. Disambiguation +8. Disambiguation ================= This is duff, the Unix command-line utility, and not DUFF, the Windows program. If you wish to find duplicate files on Windows, use DUFF. -10. Release history +9. Release history =================== -Version 0.1 was never released anywhere. +Version 0.1 was named `duplicate', and was never released anywhere. -Version 0.2 was the first release named duff. It was only released to a few -individuals, during the first half of 2005. +Version 0.2 was the first release named duff. It lacked a real checksumming +algorithm, and was thus only released to a few individuals, during the first +half of 2005. -Version 0.3 was the first official release, on November 22, 2005. +Version 0.3 was the first official release, on November 22, 2005, after a +prolonged search for a suitably licensed implementation of SHA1. -Version 0.3.1 was a bugfix release, on November 27, 2005. +Version 0.3.1 was a bugfix release, on November 27, 2005, adding a single +feature (-z), which just happened to get included. + +Version 0.4 was the second feature release, on January 13, 2006, adding a +number of missing and/or requested features as well as bug fixes. It was the +first release to be considered stable and safe enough for everyday use. diff --git a/TODO b/TODO index 7f6aabe..a8a71f1 100644 --- a/TODO +++ b/TODO @@ -1,20 +1,19 @@ -duff - Duplicate file finder -Copyright (c) 2006 Camilla Berglund +List of things to do in duff +============================ Functionality * Change to getopt_long and add long options. +* Implement i18n through gettext. * Detect duplicate file arguments? * Detect duplicate directory subtrees? -* Implement i18n through gettext. Documentation * Add ALGORITHM section (or similar) to manpage. -* Improve formatting of manpage. * Add more examples to manpage. Optimisation -* Make directory cache less stupid/slow. +* Make directory cache less stupid and slow. +* Do smart stuff to make things go faster. * Change linear lists into hash tables or trees? * Compare contents directly for clusters of two in excess mode? -* Do smart stuff to make things go faster. diff --git a/duff.c b/duff.c index 83283cc..6adfba4 100644 --- a/duff.c +++ b/duff.c @@ -1,6 +1,6 @@ /* * duff - Duplicate file finder - * Copyright (c) 2006 Camilla Berglund + * Copyright (c) 2005 Camilla Berglund * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any @@ -83,40 +83,40 @@ * directories. The different modes are defined in duff.h. */ int follow_links_mode = NO_SYMLINKS; -/* The 'all files' flag. Includes dotfiles when searching recursively. +/* The 'all files' flag. Includes dotfiles when searching recursively. */ int all_files_flag = 0; -/* The 'verbose' flag. Makes the program verbose. +/* The 'verbose' flag. Makes the program verbose. */ int verbose_flag = 0; -/* The 'recursive' flag. Recurses into all specified directories. +/* The 'recursive' flag. Recurses into all specified directories. */ int recursive_flag = 0; -/* The 'shut up' flag. Makes the program not complain about skipped +/* The 'shut up' flag. Makes the program not complain about skipped * non-files. */ int quiet_flag = 0; -/* The 'physical mode' flag. Makes the program consider entries being +/* The 'physical mode' flag. Makes the program consider entries being * physical files instead of hard links. */ int physical_flag = 0; -/* The 'excess' flag. For each duplicate cluster, reports all but one. +/* The 'excess' flag. For each duplicate cluster, reports all but one. * Useful for `xargs rm'. */ int excess_flag = 0; -/* The 'paranoid' flag. Makes the program distrust checksums, forcing +/* The 'paranoid' flag. Makes the program distrust checksums, forcing * byte-by-byte comparisons. */ int thorough_flag = 0; -/* The ignore empty files' flag. Makes the program not report empty +/* The ignore empty files' flag. Makes the program not report empty * files as duplicates. */ int ignore_empty_flag = 0; -/* The 'header format' value. Specifies the look of the cluster header. +/* The 'header format' value. Specifies the look of the cluster header. * If set to the empty string, no headers are printed. */ const char* header_format = DEFAULT_HEADER_FORMAT; -/* The 'sample limit' value. Specifies the minimal size of files to be +/* The 'sample limit' value. Specifies the minimal size of files to be * compared with the sampling method. */ off_t sample_limit = DEFAULT_SIZE_LIMIT; @@ -138,8 +138,8 @@ static void version(void) } /* Prints brief help information to stderr. - * It is a good idea to keep this synchronised with the actual code. - * It is also a good idea it keep it synchronised with the manpage. + * Note that it is a good idea to keep this synchronised with the actual code. + * Note that it is also a good idea it keep it synchronised with the manpage. */ static void usage(void) { diff --git a/duff.h b/duff.h index d3d156a..324b1f1 100644 --- a/duff.h +++ b/duff.h @@ -1,6 +1,6 @@ /* * duff - Duplicate file finder - * Copyright (c) 2006 Camilla Berglund + * Copyright (c) 2005 Camilla Berglund * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any @@ -79,7 +79,7 @@ struct Entry uint8_t* samples; }; -/* These live in duffentry.c */ +/* These are defined and documented in duffentry.c */ struct Entry* make_entry(const char* path, const struct stat* sb); void link_entry(struct Entry** head, struct Entry* entry); void unlink_entry(struct Entry** head, struct Entry* entry); @@ -87,7 +87,7 @@ void free_entry(struct Entry* entry); void free_entry_list(struct Entry** entries); int compare_entries(struct Entry* first, struct Entry* second); -/* These live in duffutil.c */ +/* These are defined and documented in duffutil.c */ void error(const char* format, ...); void warning(const char* format, ...); const char* get_mode_name(int mode); @@ -97,7 +97,7 @@ void print_cluster_header(const char* format, off_t size, const uint8_t* checksum); -/* These live in duffdriver.c */ +/* These are defined and documented in duffdriver.c */ void process_path(const char* path, int depth); void report_clusters(void);