use hash to check if a file content was really modified

Mon, 19 Aug 2019 19:19:08 +0200

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Mon, 19 Aug 2019 19:19:08 +0200
changeset 630
046b869a1c49
parent 629
bc2cdbf5e68f
child 631
93bbeb00385c

use hash to check if a file content was really modified

dav/scfg.c file | annotate | diff | comparison | revisions
dav/scfg.h file | annotate | diff | comparison | revisions
dav/sync.c file | annotate | diff | comparison | revisions
--- a/dav/scfg.c	Sun Aug 18 09:41:29 2019 +0200
+++ b/dav/scfg.c	Mon Aug 19 19:19:08 2019 +0200
@@ -393,6 +393,7 @@
     bool lockpull = false;
     bool lockpush = false;
     bool hashing = false;
+    bool store_hash = false;
     //bool detect_copy = false;
     time_t lock_timeout = 0;
     uint32_t metadata = 0;
@@ -490,6 +491,7 @@
                 }
             } else if(xstreq(node->name, "hashing")) {
                 hashing = util_getboolean(value);
+                store_hash = hashing; // TODO: extra config for this?
             } else if(xstreq(node->name, "symlinks")) {
                 uint32_t symlinkconfig = 0;
                 const char *delims = " ,\r\n";
@@ -550,6 +552,7 @@
     dir->lockpull = lockpull;
     dir->lockpush = lockpush;
     dir->hashing = hashing;
+    dir->store_hash = store_hash;
     dir->lock_timeout = lock_timeout;
     dir->metadata = metadata;
     dir->splitconfig = splitconfig;
--- a/dav/scfg.h	Sun Aug 18 09:41:29 2019 +0200
+++ b/dav/scfg.h	Mon Aug 19 19:19:08 2019 +0200
@@ -56,6 +56,9 @@
 #define SYNC_SYMLINK(dir) \
     (((dir)->symlink & SYNC_SYMLINK_SYNC) == SYNC_SYMLINK_SYNC)
     
+#define SYNC_HASHING(dir) ((dir)->hashing)
+#define SYNC_STORE_HASH(dir) ((dir)->store_hash)
+    
 typedef struct TagConfig   TagConfig;
 typedef struct Versioning  Versioning;
 typedef struct SplitConfig SplitConfig;
@@ -82,6 +85,7 @@
     bool lockpull;
     bool lockpush;
     bool hashing;
+    bool store_hash;
     uint32_t db_settings;
 } SyncDirectory;
 
--- a/dav/sync.c	Sun Aug 18 09:41:29 2019 +0200
+++ b/dav/sync.c	Mon Aug 19 19:19:08 2019 +0200
@@ -534,7 +534,7 @@
     remove_deleted_conflicts(dir, db);
     
     UcxMap *hashes = NULL;
-    if(dir->hashing) {
+    if(SYNC_HASHING(dir)) {
         hashes = create_hash_index(db);
     }
     
@@ -742,7 +742,7 @@
         ucx_map_cstr_put(conflicts, res->path, res);
     }
     
-    if(dir->hashing) {
+    if(SYNC_HASHING(dir)) {
         // check for moved/copied files
         UcxList *elm = res_new;
         UcxList *prev = NULL;
@@ -1391,7 +1391,7 @@
         }
     }
     
-    if((issplit || dir->hashing) && !link) {
+    if(issplit || (SYNC_HASHING(dir) && !link)) {
         if(truncate_file >= 0) {
             // only true if issplit is true
             if(truncate(local_path, truncate_file)) {
@@ -1773,7 +1773,7 @@
         curl_easy_setopt(sn->handle, CURLOPT_VERBOSE, 1L);
         curl_easy_setopt(sn->handle, CURLOPT_STDERR, stderr);
     }
-    if(dir->hashing) {
+    if(SYNC_STORE_HASH(dir)) {
         sn->flags |= DAV_SESSION_STORE_HASH;
     }
     
@@ -1819,7 +1819,7 @@
     DavBool remove_file = cmd_getoption(a, "remove") ? 1 : 0;
     
     UcxMap *db_hashes = NULL;
-    if(dir->hashing) {
+    if(SYNC_HASHING(dir)) {
         db_hashes = create_hash_index(db);
     }
     
@@ -1911,7 +1911,7 @@
         }
     }
     
-    if(dir->hashing) {
+    if(SYNC_STORE_HASH(dir)) {
         // calculate hashes of all new files and check if a file
         // was moved or is a copy
         UcxList *elm = ls_new;
@@ -2729,6 +2729,7 @@
             }
         }
         
+        // copy some metadata from db_res, that localscan does not deliver
         res->tags_updated = db_res->tags_updated;
         if(db_res->etag) {
             res->etag = strdup(db_res->etag);
@@ -2743,6 +2744,20 @@
             res->xattr_hash = strdup(db_res->xattr_hash);
         }
         
+        // if the resource is splitted, move the part infos to the new
+        // LocalResource obj, because we need it later
+        if(db_res->parts) {
+            res->parts = db_res->parts;
+            res->numparts = db_res->numparts;
+            db_res->parts = NULL;
+            db_res->numparts = 0;
+        }
+        
+        // check if metadata has changed
+        // metadata are tags, mode, owner, xattr
+        // set res->metadata_updated to 1 in case any metadata has changed
+        
+        // check if tags have changed
         if(dir->tagconfig && dir->tagconfig->detect_changes && !res->tags_updated) {
             UcxBuffer *tags = sync_get_file_tag_data(dir, res);
             if(tags) {
@@ -2761,12 +2776,14 @@
             res->metadata_updated = res->tags_updated;
         }
         
+        // check if mode has changed
         if((dir->metadata & FINFO_MODE) == FINFO_MODE) {
             if(db_res->mode != res->mode) {
                 res->finfo_updated = 1;
                 res->metadata_updated = 1;
             }
         }
+        // check if owner has changed
         if((dir->metadata & FINFO_OWNER) == FINFO_OWNER) {
             if(db_res->uid != res->uid || db_res->gid != res->gid) {
                 res->finfo_updated = 1;
@@ -2774,6 +2791,7 @@
             }
         }
         
+        // check if xattr have changed
         if((dir->metadata & FINFO_XATTR) == FINFO_XATTR) {
             char *path = create_local_path(dir, local_resource_path(db_res));
             XAttributes *xattr = file_get_attributes(path, (xattr_filter_func)xattr_filter, dir);
@@ -2790,23 +2808,37 @@
             }
         }
         
+        // check if the content of the file was modified
+        // in case of links, just check if the link target has changed
+        // for normal files, check last modified and size
+        // or compare content hashes
         if(nullstrcmp(db_res->link_target, res->link_target)) {
             res->link_updated = 1;
-        } else if(
+        } else {
+            if(db_res->hash && res->hash) {
+                // we already have hashes
+                if(!strcmp(db_res->hash, res->hash)) {
+                    return 0; // hashes equal -> file content unchanged
+                }                
+            } else if(
                 db_res->last_modified == res->last_modified &&
                 db_res->size == res->size &&
                 db_res->isdirectory == res->isdirectory)
-        {
-            return 0;
-        }
-        
-        if(db_res->parts) {
-            // if the resource is splitted, move the part infos to the new
-            // LocalResource obj, because we need it later
-            res->parts = db_res->parts;
-            res->numparts = db_res->numparts;
-            db_res->parts = NULL;
-            db_res->numparts = 0;
+            {
+                // mtime and size unchanged, content also likely unchanged
+                return 0;
+            } else if(SYNC_HASHING(dir) && db_res->hash) {
+                // in case of activated hashing, we check if the content
+                // has really changed
+                
+                // res->hash missing (see above)
+                char *local_path = util_concat_path(dir->path, local_resource_path(res));
+                res->hash = util_file_hash(local_path);
+                free(local_path);
+                if(res->hash && !strcmp(res->hash, db_res->hash)) {
+                    return 0;
+                }
+            }
         }
     } else {
         res->tags_updated = 1;
@@ -3877,7 +3909,7 @@
                 char *etag = dav_get_string_property(up_res, "D:getetag");
                 local_resource_set_etag(local, etag);
                 
-                if(!issplit && dir->hashing) {
+                if(!issplit && SYNC_STORE_HASH(dir)) {
                     if(local->hash) {
                         free(local->hash);
                     }

mercurial