#conform.py (version 0.1, partly tested only) #smart directory copying in Python #This solves the following problem: #there are two directories Master and Slave, presumably on different drives. #Slave is meant to be a copy of Master and remain so. They both contain huge files (for example bitmaps). #From time to time Master is reorganised (files are moved) and some new files are added, # files may even be deleted. #Filenames (without the initial directory) are supposed unique in general # two files should not bear the same names in different subdirectories, unless they are part of a list of exceptions. # Exceptions are due to a common phenomenon: readme.txt files and the like might have the same name in lots of subdirectories. # This set of exceptions can be furnished at the start of the script call. #This script maintains the conformity of Master and Slave. #How does it work? # A dictionnary of files from the Master is created. The keys are file names. The values are relative directories. # Errors can be found: duplicate files. The process stops when all duplicate files are found. # A dictionnary of files from the Slave is created. Same structure. # directories in master that are not in slave are created in slave # files are moved within slave to make the slave look like the structure in master. # new files from master are copied into slave # old files are removed from slave. # old directories are removed from the slave. import os.path import os import stat import sys dbg = 1 #returns an int def file_date(filepath): statv = os.stat(filepath) return statv[stat.ST_MTIME] def copy_file(from_file, to): ifh = open(from_file,"rb") ofh = open(to,"wb") while 1: block = ifh.read(4096) if not block: break ofh.write(block) ifh.close() ofh.close() class path_info: def __init__(self, name, path, exceptional_files): self.name = name self.root = path self.rootlen = len(path) self.files_dir_dict = {}#contains just one relative dir for a basename -- do we really need this?? Not absolutely!! self.duplicates = {}#contains all relative dirs for a basename, but only if there are duplicates self.has_duplicates = 0 self.exceptional_files = exceptional_files #a list of file basenames which can appear in lots of subdirs def add_file(self, basename, rel_dir): dd = self.duplicates df = self.files_dir_dict if df.has_key(basename): self.has_duplicates = 1 if dd.has_key(basename): dd[basename].append(rel_dir) else: dd[basename]=[df[basename],rel_dir] df[basename]=rel_dir else: df[basename] = rel_dir exceptional_files = ["readme.txt"]#just a sample list #used as a "visit" function for os.path.walk def dir_add_dict(arg, dirname, names): # arg is a path_info instance relative_dir = dirname[arg.rootlen+1:] for basename in names: if os.path.isfile(os.path.join(dirname,basename)): if not basename in arg.exceptional_files: arg.add_file(basename, relative_dir) def build_dic_master(rootpath, exceptions): master_info = path_info("master",rootpath,exceptions) os.path.walk(rootpath,dir_add_dict,master_info) duplicates = master_info.duplicates.keys() if duplicates <> []: print "Duplicates in master" for basename in duplicates: print "file:",basename ddirs = master_info.duplicates[basename] print "found in these directories:" for dir in ddirs: sys.stdout.write('"%s" '%dir) print "" return None else: return master_info def build_dic_slave(rootpath,exceptions): slave_info = path_info("slave",rootpath,exceptions) os.path.walk(rootpath,dir_add_dict,slave_info) duplicates = slave_info.duplicates.keys() if duplicates <> []: print "Duplicates in slave" for basename in duplicates: print basename ddirs = slave_info.duplicates[basename] for dir in ddirs: sys.stdout.write("%s "%dir) print "" return None else: return slave_info def create_subdirs(arg, curr_indir, entries): (out_rootpath,in_rootpath_len) = arg for entry in entries: if os.path.isdir(curr_indir+"\\"+entry): out_dir = os.path.join(out_rootpath,curr_indir[in_rootpath_len+1:],entry) if not os.path.exists(out_dir): os.mkdir(out_dir) def copy_directory_structure(in_rootpath, out_rootpath): in_rootpath_len = len(in_rootpath) os.path.walk(in_rootpath, create_subdirs, (out_rootpath,in_rootpath_len)) def move_files(master_info,slave_info): dict_master = master_info.files_dir_dict dict_slave = slave_info.files_dir_dict #first move/delete files in slave for file in dict_slave: rel_dir_slave = dict_slave[file] old_file = os.path.join(slave_info.root,rel_dir_slave,file) if dict_master.has_key(file): rel_dir_master = dict_master[file] new_file = os.path.join(slave_info.root,rel_dir_master,file) os.rename(old_file,new_file) else: os.remove(old_file)#rather brutal, are you sure? you could create a batch of files to remove, or #you could put them aside.... #now copy new files from master for file in dict_master: #if older in slave then copy over #if not in slave then copy new todo!! rel_dir = dict_master[file] full_path_master = os.path.join(master_info.root, rel_dir, file) full_path_slave = os.path.join(slave_info.root,rel_dir, file) if not os.path.exists(full_path_slave): copy_file(full_path_master, full_path_slave) else: slave_date = file_date(full_path_slave) master_date = file_date(full_path_master) if slave_date < master_date: copy_file(full_path_master, full_path_slave) def remove_excess_slave_dirs1(arg, curr_slavedir, entries): (master_rootpath, master_rootpath_len) = arg for entry in entries: if os.path.isdir(entry): master_dir = os.path.join(master_rootpath,curr_slavedir[master_rootpath_len+1:],entry) if not os.path.exists(master_dir): os.rmdir(os.path.join(curr_slavedir,entry)) def remove_excess_slave_dirs(master_rootpath, slave_rootpath): os.path.walk(slave_rootpath, remove_excess_slave_dirs1,(master_rootpath, len(master_rootpath))) #top level call def conform(master_rootpath, slave_rootpath, exceptions): copy_directory_structure(master_rootpath, slave_rootpath) master_info = build_dic_master(master_rootpath, exceptions) if not master_info: return slave_info = build_dic_slave(slave_rootpath, exceptions) if not slave_info: return move_files(master_info, slave_info)#bug shows up here... remove_excess_slave_dirs(master_rootpath, slave_rootpath) #modify this conform(r"c:\hdef\test\master",r"c:\hdef\test\slave",["readme.txt"]) print "finished processing"