V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
推荐学习书目
Learn Python the Hard Way
Python Sites
PyPI - Python Package Index
http://diveintopython.org/toc/index.html
Pocoo
值得关注的项目
PyPy
Celery
Jinja2
Read the Docs
gevent
pyenv
virtualenv
Stackless Python
Beautiful Soup
结巴中文分词
Green Unicorn
Sentry
Shovel
Pyflakes
pytest
Python 编程
pep8 Checker
Styles
PEP 8
Google Python Style Guide
Code Style from The Hitchhiker's Guide
heqingpan
V2EX  ›  Python

基于七牛 Python SDK 写的一个同步脚本(支持批量下载)

  •  1
     
  •   heqingpan · 2015-08-28 10:33:33 +08:00 · 2774 次点击
    这是一个创建于 3375 天前的主题,其中的信息可能已经有所发展或是发生改变。

    代码不是很多就贴出来吧

    #!/usr/bin/env python
    #-*- coding:utf-8 -*-
    # 
    # AUTHOR = "heqingpan"
    # AUTHOR_EMAIL = "[email protected]"
    # URL = "http://git.oschina.net/hqp/qiniu_sync"
    
    import qiniu
    from qiniu import Auth
    from qiniu import BucketManager
    import os
    import re
    import urllib2
    import sys
    
    access_key = ''
    secret_key = ''
    bucket_name = ''
    bucket_domain = ''
    
    q = Auth (access_key, secret_key )
    bucket = BucketManager (q )
    basedir=os.path.realpath (os.path.dirname (__file__))
    #同步目录
    #basedir=""
    filename=__file__
    ignore_paths=[filename,"{0}c".format (filename )]
    ignore_names=[".DS_Store",".git",".gitignore"]
    charset="utf8"
    diff_time=2*60
    
    
    def list_all (bucket_name, bucket=None, prefix="", limit=100 ):
        rlist=[]
        if bucket is None:
            bucket = BucketManager (q )
        marker = None
        eof = False
        while eof is False:
            ret, eof, info = bucket.list (bucket_name, prefix=prefix, marker=marker, limit=limit )
            marker = ret.get ('marker', None )
            for item in ret['items']:
                rlist.append (item["key"])
        if eof is not True:
            # 错误处理
            #print "error"
            pass
        return rlist
    
    def get_files (basedir="",fix="",rlist=None,ignore_paths=[],ignore_names=[]):
        if rlist is None:
            rlist=[]
        for subfile in os.listdir (basedir ):
            temp_path=os.path.join (basedir,subfile )
            tp=os.path.join (fix,subfile )
            if tp in ignore_names:
                continue
            if tp in ignore_paths:
                continue
            if os.path.isfile (temp_path ):
                rlist.append (tp )
            elif os.path.isdir (temp_path ):
                get_files (temp_path,tp,rlist,ignore_paths,ignore_names )
        return rlist
    
    def get_valid_key_files (subdir=""):
        basedir=subdir or basedir
        files = get_files (basedir=basedir,ignore_paths=ignore_paths,ignore_names=ignore_names )
        return map (lambda f:(f.replace ("\\","/"),f ),files )
    
    
    def sync ():
        qn_keys=list_all (bucket_name,bucket )
        qn_set=set (qn_keys )
        l_key_files=get_valid_key_files (basedir )
        k2f={}
        update_keys=[]
        u_count=500
        u_index=0
        for k,f in l_key_files:
            k2f[k]=f
            str_k=k
            if isinstance (k,str ):
                k=k.decode (charset )
            if k in qn_set:
                update_keys.append (str_k )
                u_index+=1
                if u_index > u_count:
                    u_index-=u_count
                    update_file (k2f,update_keys )
                    update_keys=[]
            else:
                # upload
                upload_file (k,os.path.join (basedir,f ))
        if update_keys:
            update_file (k2f,update_keys )
        print "sync end"
    
    def update_file (k2f,ulist ):
        ops=qiniu.build_batch_stat (bucket_name,ulist )
        rets,infos = bucket.batch (ops )
        for i in xrange (len (ulist )):
            k=ulist[i]
            f=k2f.get (k )
            ret=rets[i]["data"]
            size=ret.get ("fsize",None )
            put_time = int (ret.get ("putTime")/10000000 )
            local_size=os.path.getsize (f )
            local_time=int (os.path.getatime (f ))
            if local_size==size:
                continue
            if put_time >= local_time - diff_time:
                # is new
                continue
            # update
            upload_file (k,os.path.join (basedir,f ))
    
    def upload_file (key,localfile ):
        print "upload_file:"
        print key
        token = q.upload_token (bucket_name, key )
        mime_type = get_mime_type (localfile )
        params = {'x:a': 'a'}
        progress_handler = lambda progress, total: progress
        ret, info = qiniu.put_file (token, key, localfile, params, mime_type, progress_handler=progress_handler )
    
    def get_mime_type (path ):
        mime_type = "text/plain"
        return mime_type
    
    def down_file (key,basedir="",is_private=1,expires=3600 ):
        if isinstance (key,unicode ):
            key=key.encode (charset )
        url = 'http://%s/%s' % (bucket_domain, key )
        if is_private:
            url=q.private_download_url (url, expires=expires )
        c=urllib2.urlopen (url )
        fpath=key.replace ("/",os.sep )
        savepath=os.path.join (basedir,fpath )
        dir_=os.path.dirname (savepath )
        if not os.path.isdir (dir_):
            os.makedirs (dir_)
        elif os.path.isfile (savepath ):
            os.remove (savepath )
        f = file (savepath, 'wb')
        f.write (c.read ())
        f.close ()
    
    def down_all (prefix=""):
        import traceback
        for key in list_all (bucket_name,bucket,prefix=prefix ):
            try:
                down_file (key,basedir=basedir )
                print "down:\t"+key
            except:
                print "error down:\t"+key
                print traceback.format_exc ()
        print "down end"
    
    
    def main ():
        if len (sys.argv )>1:
            if sys.argv[1]=="down":
                prefix=len (sys.argv )>2 and sys.argv[2] or ""
                down_all (prefix=prefix )
                return
        sync ()
    
    if __name__=="__main__":
        main ()
    

    使用方式

    • 安装七牛 Python SDK
      pip install qiniu

    • 填写脚本文件(qiniusync.py )的配置信息

      access_key = ''
      secret_key = ''
      bucket_name = ''
      bucket_domain = ''

      注册后可以拿到对应的信息

    • 将脚本文件(qiniusync.py )拷贝到待同步根目录

    • 运行脚本

      python qiniusync.py
      python qiniusync.py down
      python qiniusync.py down [文件路径前缀]

    脚本也有发到Git@OSCgithub上,有兴趣的同学可以去看看。

    1 条回复    2015-08-28 11:40:22 +08:00
    Ouyangan
        1
    Ouyangan  
       2015-08-28 11:40:22 +08:00
    谢谢
    关于   ·   帮助文档   ·   博客   ·   API   ·   FAQ   ·   实用小工具   ·   2572 人在线   最高记录 6679   ·     Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 · 25ms · UTC 10:46 · PVG 18:46 · LAX 02:46 · JFK 05:46
    Developed with CodeLauncher
    ♥ Do have faith in what you're doing.