少女祈祷中...

MyBookShelf-python


本篇概述:用python自定义阅读酷1000+书源并导出生成短链接


1、

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import requests
import time
import re
from lxml import etree

# 收藏
post_url = 'http://ku.iszoc.com/index/sign.html'
# 导出收藏为json文件的api
export_url = 'http://ku.iszoc.com/user/Favorites/export.html'
# 阅读酷书源最大页数
search_url = "http://ku.iszoc.com/index/search.html"
# 阅读酷书源收藏书源最大页数
fa_index_url = "http://ku.iszoc.com/user/favorites/index.html"
# 删除收藏的api
del_url = "http://ku.iszoc.com/user/Favorites/selectDel.html"
# 请求头
headers = {
"Cookie":"PHPSESSID=c3a21362dc3036a515db860e095a1ca4; Hm_lvt_ea5e026ac2ed0205ce7a6417bbd1dcef=1571831331,1572157850,1572790358,1573480142; Hm_lpvt_ea5e026ac2ed0205ce7a6417bbd1dcef=1573481354",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3641.400 QQBrowser/10.4.3284.400"
}
def max(url):
'''
获取最大书源页数
'''
html = req_page_html(url=url)
obj = etree.HTML(html)
num = obj.xpath(".//ul[@class='pagination']//li/a/text()")[-3]
return int(num)

def req_page_html(url,num=3):
'''
封装函数以请求每一页书源网址(一页15个),返回resp.text
'''
try:
resp = requests.post(url,headers=headers,timeout=10).text
except Exception as e:
if num > 0: # 重试3次
return req_page_html(url,num=num-1)
else:
print(url)
print(e)
return resp

def req(post_id,num=3):
'''
封装请求函数以收藏书源
'''
data = {
"type":"yes",
"id":post_id
}
try:
resp = requests.post(post_url,headers=headers,data=data,timeout=10).text
except Exception as e:
if num > 0: # 重试3次
return req(post_id,num=num-1)
else:
print(post_id)
print(e)
return resp

def parse_div(i,div):
'''
提取信息,主要是拿到收藏状态,其他写着玩,如需不要请注释
'''
r = re.compile("[ &nbsp\r\n ]") # 过滤的正则表达式
# 每个书源div下的5个span,有些有6个
span_list = div.xpath("./div[@class='layui-card-body']/span")

# ================ ↓ 可注释 ↓ ==================================
# 点击量
dianji_count = span_list[0].xpath("./span/text()")[0]
dianji_count = re.sub(r,'',dianji_count).replace(" ","")
# 评论量
comment_count = span_list[1].xpath("./span/text()")[0]
comment_count = re.sub(r,'',comment_count)

# 更新时间
update_time = span_list[3].xpath("./span/text()")[0]
update_time = re.sub(r,'',update_time)
update_time = update_time[:10] + ' ' +update_time[10:]
# 作者
author = span_list[4].xpath("./span/text()")[0]
author = re.sub(r,"",author)
# 以下属性可能部分div没有
# 带发现
try:
search = span_list[5].xpath("./span/text()")[0]
except:
search = "无带发现"
search = re.sub(r,"",search)
# 音频源
try:
audio = span_list[5].xpath("./span/text()")[0]
except:
audio = "无音频源"
audio = re.sub(r,"",audio)
# ================ ↑ 可注释 ↑ ==================================
# 收藏状态
bookmark = span_list[2].xpath("./span/text()")[0]
bookmark = re.sub(r,"",bookmark)

# 如注释收藏状态以外的部分,请使用下面该行
# return bookmark
return [dianji_count,comment_count,bookmark,update_time,author,search,audio],bookmark

def work():
for i in range(1,max(search_url)+1):
url = 'http://ku.iszoc.com/index/search.html?page={}'.format(i)
resp = req_page_html(url)
etree_obj = etree.HTML(resp)
div_list = etree_obj.xpath("//div[@class='layui-card-body']/div")[1:] # 第一个是公告

for div in div_list:
# 每个书源的id
post_id = div.xpath("./a/@href")[0].split('.')[0].split('/')[-1]
mes_list,bookmark = parse_div(i,div)
# print(post_id,bookmark)
if bookmark == '未收藏':
resp = req(post_id)
if '收藏成功' in resp:
print(i,post_id,'收藏成功')
else:
print(resp)
print(i,post_id,'error')
elif bookmark == '已收藏':
print(i,post_id,'已收藏')

time.sleep(0.5)

# break
time.sleep(5)
# print('\n')
def export():
'''
获取收藏内的书源的value_id,并将所有id拼接起来
'''
value_id = ''
for i in range(1,max(fa_index_url)+1):
fa_url = 'http://ku.iszoc.com/user/favorites/index.html?page={}'.format(i)
resp = req_page_html(fa_url)
etree_obj = etree.HTML(resp)
id_list = etree_obj.xpath("//input[@name='key[]']/@value")

for _id in id_list:
value_id += _id
value_id += ','
# print(value_id)
time.sleep(0.5)
# break

with open('1008_values_id_list.txt','w') as f:
f.write(value_id)

data = {
'id':value_id
}
# 批量删除所有收藏书源
# resp = requests.post(del_url,headers=headers,data=data,timeout=10).text
# 批量导出所有收藏书源
resp = requests.post(export_url,headers=headers,data=data,timeout=10).text
print(resp)

if __name__ == '__main__':
'''
1、使用前清获取自己的cookie填入headers中的cookies字段中
2、如需删除所有收藏的书源,将下面三行注释
再将export()方法中post的url由export_url改为del_url即可
'''
print("="*30,"开始收藏","="*30)
work() # 全部收藏
print("="*30,"收藏完成","="*30)

print("="*30,"开始获取书源地址","="*30)
export() # 拿到所以value id生成json
print("="*30,"获取完成","="*30)

2、

3、

-------------本文结束感谢您的阅读-------------

本文标题:MyBookShelf-python

文章作者:Coder-Sakura

发布时间:2019年11月12日 - 00:20:35

最后更新:2019年12月22日 - 11:25:12

原始链接:https://coder-sakura.github.io/blog/2019/11/12/mybookshelf-python/

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。