python 淘宝爬虫示例源码(抓取天猫数据)

python 淘宝爬虫示例源码(抓取天猫数据)
资源下载需要积分:2
  • 4360***@qq.com 开通了 永久会员
    3小时前
  • 1314***@qq.com 开通了 钻石会员
    13小时前
  • 1365***@qq.com 开通了 永久会员
    22小时前
  • 6020***@qq.com 开通了 铂金会员
    1天前
  • 6037***@qq.com 开通了 钻石会员
    1天前
  • 9401***@qq.com 开通了 黄金会员
    2天前
  • 4301***@qq.com 开通了 钻石会员
    2天前
  • 7229***@qq.com 开通了 永久会员
    2天前
  • 6028***@qq.com 开通了 永久会员
    3天前
  • 9671***@qq.com 开通了 铂金会员
    3天前
  • 6587***@qq.com 开通了 铂金会员
    3天前
  • 5468***@qq.com 开通了 铂金会员
    3天前
  • 1159***@qq.com 开通了 黄金会员
    3天前
  • 2856***@qq.com 开通了 永久会员
    3天前
  • 3758***@qq.com 开通了 铂金会员
    3天前
  • 4058***@qq.com 开通了 永久会员
    4天前
  • 5211***@qq.com 开通了 永久会员
    4天前
  • 4767***@qq.com 开通了 永久会员
    4天前
  • 源码分类:程序源码
  • 发布日期:2019-05-21 13:05
  • 展示次数:3904
  • 下载次数:1
  • 收藏次数:18
  • 分享人员:本站会员

爬取淘宝 天猫网站数据# -*- coding: utf-8 -*- #!/usr/bin/env Python import dateTime import URLparse import socket import scrapy from scrapy.loader.processors import MapCompose, Join from scrapy.loader import ItemLoader from scrapy.http import Request import json import base64 import scrapy from scrapy.http.headers import Headers from taobao.items import TaobaoItem from urllib import quote,unquote import sys reload(sys) sys.setDEFAULTencoding('utf-8') class MySpider(scrapy.Spider):     name = 'tmall2'     start_urls = ["http://example.com", "http://example.com/foo"]         def __init__(self):         self.headers={                 'Host': 'detail.tmall.com',                 'user-Agent': 'Mozilla/5.0 (windows NT 10.0; WOW64; rv:44.0) GECko/20100101 Firefox/44.0',                 'Accept': 'text/html,application/xhtml xml,application/xml;q=0.9,*/*;q=0.8',                 'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',                 'Accept-Encoding':'gzip, deflate, br',                 'Referer':'https://list.tmall.com/search_product.htm?q=iphone',                 'Cookie':'hng=CN%7Czh-cn%7CCNY; l=AmFhUQz9l9Bm0s1PIcUbVzUrUSd709Vr; pnm_cku822=213UW5TcyMNYQwiAiwTR3tCf0J%2FQnhEcUpkMmQ%3D%7CUm5OcktzSHFMdkpwTXFEcSc%3D%7CU2xMHDJ%2BH2QJZwBxX39RaFF%2FX3EtTCpGIV8lC10L%7CVGhXd1llXGRfZlthXWdaZlNmUWxOdEpxRXhMeUx0QHhMckh2Qmw6%7CVWldfS0SMg4zBycbJAQqAXRfeB9kNFY0EDtEajxq%7CVmhIGCwSMg8vEycaJAQ6DzQIKBQgHyICPgM2CysXIxwhAT0AOQRSBA%3D%3D%7CV25Tbk5zU2xMcEl1VWtTaUlwJg%3D%3D; cna=c7xUD5TeoxgCARsmEAVdwH4E; cq=ccp%3D1; t=ea7cda7b4dd7d94c574c51a61cd68bf6; uc3=nk2=G4mgLCRZx6no8qfi5g%3D%3D&id2=UonZBtTqYSCQGg%3D%3D&vt3=F8dAscn1mkMKfq3pmos%3D&lg2=W5iHLLyFOGW7aA%3D%3D; lgc=xiaowenjie886; tracknick=xiaowenjie886; _tb_token_=WcXcAjsXNiib; cookie2=3647140634e8134de4621d27d06a6239; OZ_1U_2061=vid=v6cf00b635ac22.0&ctime=1456406710&ltime=0; OZ_1Y_2061=erefer=https%3A//list.tmall.com/search_product.htm%3Fq%3D%25CD%25E2%25CC%25D7%25C4%25D0%26click_id%3D%25CD%25E2%25CC%25D7%25C4%25D0%26from%3Dmallfp..pc_1.0_hq%26spm%3D875.7789098.a1z5h.1.1DJapJ&eurl=https%3A//detail.tmall.com/item.htm%3Fspm%3Da220m.1000858.1000725.11.XG2djx%26id%3D525068649325%26skuId%3D3125134725161%26areaId%3D440300%26cat_id%3D50025174%26rn%3D020410dd2019f68eaf3d848b4d14552f%26user_id%3D196993935%26is_b%3D1&etime=1456406710&ctime=1456406710&ltime=0&compid=2061',                 'Connection':'keep-alive',                 'cache-Control':'max-age=0'         }         self.cookies={                                                              'l':'ArGxZLdew/Qq2hKqnZPLZoKK4TdLHyUb',                          'cna':'OW9VD5ReU2Acadxw7hJSgV4y',                          'cookie2':'1cfecc6ae5749b36804d524b9d0cccb4',                          't':'2fd2137e54b753c57bec7b945f504547',                          '_tb_token_':'l0ckiPAV9KXX',                          'ck1':'',                         'uc1':'cookie14=UoWyiPlLPWymJA%3D%3D&existShop=false&cookie16=U%2BGCWk%2F74Mx5tgzv3dWpnhjPaQ%3D%3D&cookie21=WqG3DMC9EdFmJgke4t0pDw%3D%3D&tag=3&cookie15=VT5L2FSpMGV7TQ%3D%3D&pas=0',                         'uc3':'nk2=G4mgLCRZx6no8qfi5g%3D%3D&id2=UonZBtTqYSCQGg%3D%3D&vt3=F8dAScn1nphE%2FG5b7yQ%3D&lg2=Vq8l%2BKCLz3%2F65A%3D%3D',                         'lgc':'xiaowenjie886',                         'tracknick':'xiaowenjie886',                          'cookie1':'UNaG7hUVmBqzT5U4J5xH8HeBiBsUUL0QGHEE%2BJc503Q%3D',                         'unb':'1821174258',                         'skt':'116663449cdcca0c',                          '_nk_':'xiaowenjie886',                          '_l_g_':'Ug%3D%3D',                         'cookie17':'UonZBtTqYSCQGg%3D%3D',                          'hng':'CN%7Czh-cn%7CCNY',                         'login':'true',                         'pnm_cku822':'pnm_cku822=213UW5TcyMNYQwiAiwTR3tCf0J%2FQnhEcUpkMmQ%3D%7CUm5OcktzSHFMdkpwTXFEcSc%3D%7CU2xMHDJ%2BH2QJZwBxX39RaFF%2FX3EtTCpGIV8lC10L%7CVGhXd1llXGRfZlthXWdaZlNmUWxOdEpxRXhMeUx0QHhMckh2Qmw6%7CVWldfS0SMg4zBycbJAQqAXRfeB9kNFY0EDtEajxq%7CVmhIGCwSMg8vEycaJAQ6DzQIKBQgHyICPgM2CysXIxwhAT0AOQRSBA%3D%3D%7CV25Tbk5zU2xMcEl1VWtTaUlwJg%3D%3D; expires=Sat, 26 Mar 2016 13:32:50 GMT; path=/; domain=detail.tmall.com'         }                  self.url='https://s.taobao.com/search?spm=a21bo.7724922.8452-fline.1.uFDF4G&q=秋季打底衫'     def start_requests(self): script=""" function main(splash)   assert(splash:go(splash.args.url))   splash:wait(1.0)   return splash:html() end         """ yield scrapy.Request(self.url,self.parse_result, Meta={              'splash': {                 'args': {'lua_source': script,'url':self.url},                 'endpoint': 'execute',             }         })             def parse_result(self, response): pageCountXpath=response.xpath("//div[@class='pager']/ul/li[2]/text()").extract() page=(','.join(pageCountXpath))[1:] pagecount=int(page) script="""        function main(splash)                assert(splash:go(splash.args.url))                assert(splash:wait(8.5))                return splash:html()        end       """  for i in range(0,44*pagecount,44):    url2='https://s.taobao.com/search?q=秋季打底衫&s=%d' % i    yield scrapy.Request(url2,self.parse_next,meta={    'splash':{        'args':{'lua_source':script,'url':url2},        'endpoint':'execute',    }         })         def parse_next(self,response):         item = TaobaoItem() titleALL=response.xpath("//div[@class='item  ']/div[2]/div[2]/a/text()").extract() item['title']=titleALL         shopnameAll =response.xpath("//a[@class='shopname J_MouseEneterLeave J_ShopInfo']/span[2]/text()").extract() item["shopname"]=shopnameAll return item #return item         # sudo service docker restart

python 淘宝爬虫示例源码(抓取天猫数据)

评论


亲,登录后才可以留言!

来源:python 淘宝爬虫示例源码(抓取天猫数据)

大猪猪源码素材网所有素材均为本站用户上传,仅供学习与参考,请勿用于商业用途,如有侵犯您的版权请联系客服服务QQ
本站提供各类程序源码素材。
由于技术有限本站不提供安装服务与bug修复,各类源码只提供分享服务,感谢您的理解。
由于代码类属可复制类,下载后概不退款,请知悉。
如果对本站有任何意见请点击右侧侧边栏的反馈意见,我们会及时处理。

相似素材

本站会员

这个家伙很懒未写签名~