当前位置：首页 > news >正文

实战演练：利用京东API一键抓取商品详情

news 2026/6/4 1:23:58

🛒 京东商品详情抓取实战

📋 目录

https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E5%87%86%E5%A4%87%E5%B7%A5%E4%BD%9C
https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%96%B9%E6%A1%88%E4%B8%80
https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%96%B9%E6%A1%88%E4%BA%8C
https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E5%AE%8C%E6%95%B4%E4%BB%A3%E7%A0%81
https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%95%B0%E6%8D%AE%E8%A7%A3%E6%9E%90

🚀 准备工作

1️⃣ 申请京东联盟API权限

bash

# 注册地址：https://union.jd.com/ # 步骤： # 1. 注册账号 → 2. 创建应用 → 3. 获取 AppKey 和 AppSecret

2️⃣ 安装依赖

bash

pip install requests hashlib time json

⭐ 方案一：使用京东联盟API（推荐✅）

完整代码实现

python

import requests import hashlib import time import json from urllib.parse import quote class JDApiClient: """京东联盟API客户端""" def __init__(self, app_key, app_secret): self.app_key = app_key self.app_secret = app_secret self.access_token = None self.token_expire = 0 def _get_timestamp(self): """获取时间戳""" return str(int(time.time() * 1000)) def _get_sign(self, params): """生成签名""" sorted_params = sorted(params.items()) sign_str = self.app_secret for k, v in sorted_params: sign_str += f"{k}{v}" sign_str += self.app_secret return hashlib.md5(sign_str.encode()).hexdigest().upper() def get_access_token(self): """获取访问令牌""" if self.access_token and time.time() < self.token_expire: return self.access_token url = "https://api.jd.com/token" params = { "grant_type": "client_credentials", "app_key": self.app_key, "app_secret": self.app_secret, "timestamp": self._get_timestamp(), "sign_method": "md5", } params["sign"] = self._get_sign(params) response = requests.get(url, params=params) data = response.json() if "access_token" in data: self.access_token = data["access_token"] self.token_expire = time.time() + data["expires_in"] - 300 return self.access_token else: raise Exception(f"获取Token失败: {data}") def get_goods_detail(self, sku_id): """ 获取商品详情 :param sku_id: 商品SKU ID（京东商品ID） """ url = "https://api.jd.com/routerjson" params = { "method": "jingdong.union.open.goods.query", "app_key": self.app_key, "access_token": self.get_access_token(), "timestamp": self._get_timestamp(), "format": "json", "v": "2.0", "sign_method": "md5", "skuIds": sku_id, } params["sign"] = self._get_sign(params) response = requests.get(url, params=params) return response.json() def get_goods_promotion(self, sku_id): """获取商品促销信息（价格、优惠券等）""" url = "https://api.jd.com/routerjson" params = { "method": "jd.union.open.goods.promotion.get", "app_key": self.app_key, "access_token": self.get_access_token(), "timestamp": self._get_timestamp(), "format": "json", "v": "2.0", "sign_method": "md5", "skuId": sku_id, } params["sign"] = self._get_sign(params) response = requests.get(url, params=params) return response.json() # 🔥 使用示例 if __name__ == "__main__": # 替换为你的 AppKey 和 AppSecret APP_KEY = "your_app_key_here" APP_SECRET = "your_app_secret_here" client = JDApiClient(APP_KEY, APP_SECRET) # 示例：抓取 iPhone 15 的商品详情 # SKU ID 可以从京东商品URL中获取：item.jd.com/100038004356.html -> 100038004356 sku_id = "100038004356" print("🔍 正在获取商品详情...") detail = client.get_goods_detail(sku_id) print(json.dumps(detail, ensure_ascii=False, indent=2)) print("\n💰 正在获取促销信息...") promo = client.get_goods_promotion(sku_id) print(json.dumps(promo, ensure_ascii=False, indent=2))

🔧 方案二：直接爬取（无需API Key）

python

import requests from bs4 import BeautifulSoup import json import re class JDCrawler: """京东商品爬虫（无需认证）""" def __init__(self): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Accept': 'application/json, text/plain, */*', 'Referer': 'https://www.jd.com/' } def get_product_json(self, item_id): """ 从商品页面提取JSON数据 :param item_id: 商品ID（从URL获取） """ url = f"https://item.jd.com/{item_id}.html" response = requests.get(url, headers=self.headers) soup = BeautifulSoup(response.text, 'html.parser') # 查找页面中的JSON数据 scripts = soup.find_all('script', type='text/javascript') for script in scripts: text = script.string if text and 'window.__INITIAL_STATE__' in text: # 提取JSON部分 json_str = re.search(r'window\.__INITIAL_STATE__=({.*?});', text, re.DOTALL) if json_str: return json.loads(json_str.group(1)) return None def get_product_api(self, item_id): """ 使用京东内部API（更稳定） """ url = f"https://p.3.cn/prices/mgets?skuIds=J_{item_id}" response = requests.get(url, headers=self.headers) return response.json() # 🔥 使用示例 crawler = JDCrawler() # 抓取 iPhone 15 item_id = "100038004356" print("📦 方法1: 页面解析") data = crawler.get_product_json(item_id) if data: print(f"商品名: {data['pcData']['productInfo']['name']}") print(f"价格: ¥{data['pcData']['productInfo']['price']}") print("\n💰 方法2: 价格API") price_data = crawler.get_product_api(item_id) print(json.dumps(price_data, ensure_ascii=False, indent=2))

🎯 完整实战：批量抓取 + 数据存储

python

import requests import json import csv from datetime import datetime import time class JDBatchCrawler: """批量商品抓取工具""" def __init__(self): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } def get_product_info(self, item_id): """获取单个商品信息""" try: # 使用价格API url = f"https://p.3.cn/prices/mgets?skuIds=J_{item_id}" resp = requests.get(url, headers=self.headers, timeout=10) price_data = resp.json() # 获取商品详情 detail_url = f"https://item.jd.com/{item_id}.html" detail_resp = requests.get(detail_url, headers=self.headers, timeout=10) # 提取关键信息 name_match = re.search(r'<h1 class="name">(.*?)</h1>', detail_resp.text) shop_match = re.search(r'class="name".*?>(.*?)</a>', detail_resp.text) product = { 'item_id': item_id, 'name': name_match.group(1) if name_match else 'N/A', 'shop': shop_match.group(1) if shop_match else 'N/A', 'price': price_data[0].get('p', 'N/A') if price_data else 'N/A', 'crawl_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S') } return product except Exception as e: print(f"❌ 抓取失败 {item_id}: {e}") return None def batch_crawl(self, item_ids, output_file='products.csv'): """批量抓取""" results = [] for i, item_id in enumerate(item_ids, 1): print(f"📊 [{i}/{len(item_ids)}] 正在抓取: {item_id}") product = self.get_product_info(item_id) if product: results.append(product) time.sleep(1) # 避免请求过快 # 保存到CSV if results: with open(output_file, 'w', newline='', encoding='utf-8-sig') as f: writer = csv.DictWriter(f, fieldnames=results[0].keys()) writer.writeheader() writer.writerows(results) print(f"\n✅ 成功抓取 {len(results)} 个商品，已保存到 {output_file}") return results # 🚀 运行示例 if __name__ == "__main__": # 商品ID列表 item_ids = [ "100038004356", # iPhone 15 "100012043978", # MacBook Pro "100026789012", # AirPods Pro ] crawler = JDBatchCrawler() crawler.batch_crawl(item_ids)

📊 数据解析示例

python

import json # 模拟API返回的数据 api_response = { "code": "0", "msg": "成功", "data": { "goodsInfo": { "skuId": "100038004356", "spuId": "100038004356", "name": "Apple iPhone 15 (A3092) 128GB 蓝色", "shopId": "1000001", "categoryId": "9987", "brandId": "1320", "price": "5999.00", "jdPrice": "5999.00", "promotionPrice": "5499.00", "couponPrice": "5299.00", "commissionRate": "1.5", "commission": "89.99" } } } # 解析数据 data = api_response["data"]["goodsInfo"] print(f"🏷️ 商品名称: {data['name']}") print(f"💰 京东价: ¥{data['jdPrice']}") print(f"🔥 促销价: ¥{data['promotionPrice']}") print(f"🎫 优惠券价: ¥{data['couponPrice']}") print(f"📈 佣金比例: {data['commissionRate']}%") print(f"💵 预估佣金: ¥{data['commission']}")

⚠️ 注意事项

事项	说明
🔐频率限制	API每秒最多10次请求，爬虫建议间隔1-2秒
🛡️反爬机制	添加随机User-Agent，使用代理IP
📜合规使用	仅用于学习研究，遵守robots.txt
🔑Token有效期	Access Token通常2小时过期，需刷新

🎁 快速开始模板

python

# 一键复制即可运行（替换APP_KEY和APP_SECRET） from jd_api import JDApiClient client = JDApiClient("your_app_key", "your_app_secret") data = client.get_goods_detail("100038004356") print(data)

需要我详细讲解某个部分吗？比如：