V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
推荐学习书目
Learn Python the Hard Way
Python Sites
PyPI - Python Package Index
http://diveintopython.org/toc/index.html
Pocoo
值得关注的项目
PyPy
Celery
Jinja2
Read the Docs
gevent
pyenv
virtualenv
Stackless Python
Beautiful Soup
结巴中文分词
Green Unicorn
Sentry
Shovel
Pyflakes
pytest
Python 编程
pep8 Checker
Styles
PEP 8
Google Python Style Guide
Code Style from The Hitchhiker's Guide
charlescwj
V2EX  ›  Python

求助:多进程调用百度地图 API 获取数据,比单进程慢,为什么啊?(附代码)

  •  
  •   charlescwj · 2018-05-24 13:41:16 +08:00 · 2188 次点击
    这是一个创建于 2401 天前的主题,其中的信息可能已经有所发展或是发生改变。

    多进程调用百度地图 api,先获取经纬度,然后利用经纬度获取过路费,保存到 excel (获取一条存一条),因为想加快速度,所以写了多进程,但是经过测试,多进程比单进程还要慢,想请教下为什么啊?附代码(我的 ak 隐藏了):

    • 单进程代码:
    import requests
    from openpyxl import load_workbook
    import time
    
    # 获取经纬度
    def geocode(address):
        base = url = "http://api.map.baidu.com/geocoder/v2/?address=" + address + "&output=json&ak="
        response = requests.get(base)
        answer = response.json()
        return answer['result']['location']
    
    # 获取过路费
    def get(origin_lat,origin_lng,destination_lat,destination_lng):
        base = url = "http://api.map.baidu.com/direction/v2/driving?origin=" + str(origin_lng) + "," + str(origin_lat) +  "&destination=" \
                     + str(destination_lng)+","+str(destination_lat) + "&output=json&ak="
        response = requests.get(base)
        answer = response.json()
        info = [answer['result']['routes'][0]['duration']/60,answer['result']['routes'][0]['distance']/1000,answer['result']['routes'][0]['toll']]
        return info
    
    if __name__=='__main__':
        start = time.clock()
        data = load_workbook(r"ODdata.xlsx")
        table = data.get_sheet_by_name('locationcode')
        nrows = table.max_row
        ncols = table.max_column
    
        origin_table = data.get_sheet_by_name('OD')
        origin_nrows = origin_table.max_row
        origin_ncols = origin_table.max_column
        go_outset = []
        go_destination = []
        for r in range(2,nrows+1):
            go_outset.append(table.cell(row=r,column=2).value) #生成去程出发地列表
        for r in range(2,nrows+1):
            go_destination.append(table.cell(row=r,column=5).value) #生成去程目的地列表
    
        go_outset_count=1
        go_destination_count=1
        go_outset_locationcode = []
        for i in go_outset:
            try:
                go_outset_locationcode.append(geocode(i)) #生成去程出发地经纬度
                print("出发地经纬度查询计数%d"%go_outset_count)
                go_outset_count+=1
            except:
                go_outset_locationcode.append({'lat':'wrong','lng':'wrong'})
    
        go_destination_locationcode = []
        for i in go_destination:
            try:
                go_destination_locationcode.append(geocode(i))#生成去程目的地经纬度
                print("目的地经纬度查询计数%d" % go_destination_count)
                go_destination_count+=1
            except:
                go_destination_locationcode.append({'lat':'wrong','lng':'wrong'})
    
        go_outset_locationcodelist = []
        go_destination_locationcodelist = []
    
        for i in range(len(go_outset_locationcode)):
            go_outset_locationcodelist.append(go_outset_locationcode[i].values())
    
        for i in range(len(go_destination_locationcode)):
            go_destination_locationcodelist.append(go_destination_locationcode[i].values())
    
        #将经纬度和省份写入 excel
        for i in range(2,nrows+1):
            for j in range(3,5):
                _ = table.cell(column=j, row=i, value=list(go_outset_locationcodelist[i-2])[j-3])
        for i in range(2,nrows+1):
            for j in range(6,8):
                _ = table.cell(column=j, row=i, value=list(go_destination_locationcodelist[i-2])[j-6])
        data.save(r"ODdata.xlsx")
    
        #获取过路费
        info = []
        go_count=1
        for i in range(0,len(go_outset)):
            if list(go_outset_locationcodelist[i])[0]=='wrong':
                continue
            else:
                try:
                    info.append(get(list(go_outset_locationcodelist[i])[0],list(go_outset_locationcodelist[i])[1],list(go_destination_locationcodelist[i])[0],list(go_destination_locationcodelist[i])[1]))
                    print("过路费查询计数%d" % go_count)
                    go_count+=1
                except:
                    info.append(['wrong','wrong','wrong'])
                    print("错误行数是%d"%i)
                finally:
                    for j in range(8,11):
                        _ = origin_table.cell(column=j, row=i+3, value=info[i][j - 8])
                    data.save(r"ODdata.xlsx")
    
        elapsed = (time.clock() - start)
        print("Time used:", elapsed)
    
    • 多进程
    import requests
    from openpyxl import load_workbook
    import multiprocessing
    from multiprocessing import Lock,Pool
    import time
    
    
    # 获取经纬度
    def geocode(address):
        base = url = "http://api.map.baidu.com/geocoder/v2/?address=" + address + "&output=json&ak="
        response = requests.get(base)
        answer = response.json()
        return answer['result']['location']
    
    # 保存到 excel
    def save(info):
        # data_new = load_workbook(r"ODdata.xlsx")
        # origin_table_new = data_new['OD']
        for j in range(8, 11):
            _ = origin_table.cell(column=j, row=i + 3, value=info[j-8])
        data.save(r"ODdata.xlsx")
        print("第%d 行保存成功" % (i + 1))
    
    # 获取过路费
    def getall(i,origin_lat,origin_lng,destination_lat,destination_lng):
        try:
            base = url = "http://api.map.baidu.com/direction/v2/driving?origin=" + str(origin_lng) + "," + str(
                origin_lat) + "&destination=" \
                         + str(destination_lng) + "," + str(
                destination_lat) + "&output=json&ak="
            response = requests.get(base)
            answer = response.json()
            info = [answer['result']['routes'][0]['duration'] / 60, answer['result']['routes'][0]['distance'] / 1000,
                    answer['result']['routes'][0]['toll']]
            print("过路费查询成功,第%d 行" % (i+1))
        except:
            info=['wrong', 'wrong', 'wrong']
            print("过路费查询失败,第%d 行" % (i+1))
        # finally:
        #     for j in range(8, 11):
        #         _ = origin_table.cell(column=j, row=i + 3, value=info[j - 8])
        #     data.save(r"ODdata.xlsx")
        #     print("第%d 行保存成功" % (i + 1))
        return info
    
    
    
    if __name__=='__main__':
        start = time.clock()
    
        data = load_workbook(r"ODdata.xlsx")
        table = data['locationcode']
        nrows = table.max_row
        ncols = table.max_column
    
        origin_table = data['OD']
        origin_nrows = origin_table.max_row
        origin_ncols = origin_table.max_column
        go_outset = []
        go_destination = []
        for r in range(2,nrows+1):
            go_outset.append(table.cell(row=r,column=2).value) #生成去程出发地列表
        for r in range(2,nrows+1):
            go_destination.append(table.cell(row=r,column=5).value) #生成去程目的地列表
    
        go_outset_count=1
        go_destination_count=1
        go_outset_locationcode = []
        for i in go_outset:
            try:
                go_outset_locationcode.append(geocode(i)) #生成去程出发地经纬度
                print("出发地经纬度查询计数%d"%go_outset_count)
                go_outset_count+=1
            except:
                go_outset_locationcode.append({'lat':'wrong','lng':'wrong'})
    
        go_destination_locationcode = []
        for i in go_destination:
            try:
                go_destination_locationcode.append(geocode(i))#生成去程目的地经纬度
                print("目的地经纬度查询计数%d" % go_destination_count)
                go_destination_count+=1
            except:
                go_destination_locationcode.append({'lat':'wrong','lng':'wrong'})
    
        go_outset_locationcodelist = []
        go_destination_locationcodelist = []
    
        for i in range(len(go_outset_locationcode)):
            go_outset_locationcodelist.append(go_outset_locationcode[i].values())
    
        for i in range(len(go_destination_locationcode)):
            go_destination_locationcodelist.append(go_destination_locationcode[i].values())
    
        #将经纬度和省份写入 excel
        for i in range(2,nrows+1):
            for j in range(3,5):
                _ = table.cell(column=j, row=i, value=list(go_outset_locationcodelist[i-2])[j-3])
        for i in range(2,nrows+1):
            for j in range(6,8):
                _ = table.cell(column=j, row=i, value=list(go_destination_locationcodelist[i-2])[j-6])
        data.save(r"ODdata.xlsx")
    
        #开启多进程,获取过路费
        for i in range(0,len(go_outset)):
            if list(go_outset_locationcodelist[i])[0]=='wrong':
                continue
            else:
                pool = multiprocessing.Pool(processes=5)
                pool.apply_async(getall,(i,list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],list(go_destination_locationcodelist[i])[0], list(go_destination_locationcodelist[i])[1],),callback=save)
                # pool.apply(getall, (i, list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],
                #                           list(go_destination_locationcodelist[i])[0],
                #                           list(go_destination_locationcodelist[i])[1],))
                # p = multiprocessing.Process(target=save, args=(i,list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],
                #                 list(go_destination_locationcodelist[i])[0], list(go_destination_locationcodelist[i])[1],lock))
                # p.start()
                pool.close()
                pool.join()
    
        elapsed = (time.clock() - start)
        print("Time used:", elapsed)
    
    
    3 条回复    2018-05-24 19:18:14 +08:00
    charlescwj
        1
    charlescwj  
    OP
       2018-05-24 13:55:27 +08:00
    @John60676 大神 能帮我看一下这个问题吗?
    charlescwj
        2
    charlescwj  
    OP
       2018-05-24 17:32:03 +08:00
    自己来回复一下:明白问题出在哪里了 就是 我在创建和阻塞进程池的时候 卸载了循环内 应该卸载循环外才对
    charlescwj
        3
    charlescwj  
    OP
       2018-05-24 19:18:14 +08:00 via Android
    写在
    关于   ·   帮助文档   ·   博客   ·   API   ·   FAQ   ·   实用小工具   ·   3046 人在线   最高记录 6679   ·     Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 · 25ms · UTC 14:03 · PVG 22:03 · LAX 06:03 · JFK 09:03
    Developed with CodeLauncher
    ♥ Do have faith in what you're doing.