仓库源文站点原文

<!--more--> <p>爬取所有期刊的ISSN、期刊名、复合IF、综合IF、点评、查看以及指定期刊的研究方向、投稿录用比、审稿速度、审稿费用、版面费用,但是这些都是投过这个期刊的同学公布的自己的历程,仅供参考。</p>

{% code %} import requests from lxml import html

conn = requests.session() headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/63.0.3239.26 ' 'Safari/537.36 Core/1.63.5733.400 ' 'QQBrowser/10.2.2019.400'}

验证码计算

def cal(sen): number = sen.split(':')[1].split('等于')[0] ans = 0 if '加' in sen: ans = int(number.split('加')[0]) + int(number.split('加')[1]) elif '减' in sen: ans = int(number.split('减')[0]) - int(number.split('减')[1]) elif '乘以' in sen: ans = int(number.split('乘以')[0]) * int(number.split('乘以')[1]) elif '除以' in sen: ans = int(number.split('除以')[0]) / int(number.split('除以')[1]) return int(ans)

中文期刊

def all_journal():

# 登录
url = 'http://muchong.com/bbs/logging.php?action=login'
postdata = {
    'formhash': 'da8aadbd',
    'username': '******',
    'password': '******',
    'cookietime': 31536000,
    'refer': '',
    'loginsubmit': '(unable to decode value)'
}
rep = conn.post(url, data=postdata, headers=headers)

# 验证
yanzheng = html.fromstring(rep.text)
question = yanzheng.xpath('//form[@name="input"]/div/text()')[0]
formhash = yanzheng.xpath('//input[@name="formhash"]/@value')[0]
post_sec_hash = yanzheng.xpath('//input[@name="post_sec_hash"]/@value')[0]
answer = cal(question)
# print(question, answer, formhash, post_sec_hash)
postdata = {
    'formhash': formhash,
    'post_sec_code': answer,
    'post_sec_hash': post_sec_hash,
    'username': '******',
    'loginsubmit': '(unable to decode value)',
}
rep1 = conn.post(url, data=postdata, headers=headers)

# 期刊
# 第一页
url = 'http://muchong.com/bbs/journal_cn.php'
rep2 = conn.get(url, headers=headers)
qikan = html.fromstring(rep2.text)
head_name = qikan.xpath('//div[@class="wrapper"][8]/div[@class="forum_head"]//td/text()')
all_qikan = qikan.xpath('//div[@class="wrapper"][8]/div[@class="forum_body forum_body_journal"]//tbody')
for a in all_qikan[:]:
    x = a.xpath('string(.)')
    print(x.split())

# 第一页往后
for i in range(2, 23):
    url = 'http://muchong.com/bbs/journal_cn.php?from=emuch&amp;view=&amp;classid=0&amp;class_credit=0&amp;page=' + str(i)
    rep2 = conn.get(url, headers=headers)
    qikan = html.fromstring(rep2.text)
    head_name = qikan.xpath('//div[@class="wrapper"][6]/div[@class="forum_head"]//td/text()')
    all_qikan = qikan.xpath('//div[@class="wrapper"][6]/div[@class="forum_body forum_body_journal"]//tbody')
    for a in all_qikan[:]:
        x = a.xpath('string(.)')
        print(x.split())

def journal_name(name): url = 'http://muchong.com/bbs/journal_cn.php' name = name.encode("GBK") postdata = { 'issn': '', 'tagname': '', 'name': name, 'ssubmit': '(unable to decode value)', 'accept-charset': "utf-8" } rep = conn.post(url, data=postdata, headers=headers) qikan = html.fromstring(rep.text) every_qikan = qikan.xpath('//div[@class="wrapper"][6]/div[@class="forum_body forum_body_journal"]//tbody') for a in every_qikan[:]: x = a.xpath('tr/th/a/@href') url = 'http://muchong.com/bbs/'+x[0] print(url) detail(url)

def detail(url): rep = conn.get(url, headers=headers) _detail = html.fromstring(rep.text)

# 虫友提供资料
deta = _detail.xpath('//div[@class="wrapper"][4]/div[@class="forum_explan bg_global"][2]//tr')
for i in deta:
    i1 = i.xpath('string(.)')
    print(i1.split())

print('----------指定期刊----------') journal_name('中文信息学报') print('----------所有核心期刊----------') all_journal() {% endcode %}

<p>运行结果</p>

{% asset_img xiaomuchong.png %}

<p>1、需要更改自己的用户名和密码</p><p>2、如果还有需要的功能可以留言增加</p>