- Stuff
- Tuesday, November 25th, 2008 at 11:30:52pm MST
- #!/usr/bin/env python
- # -*- coding: UTF-8 -*-
- #Author: Roy L Zuo (roylzuo at gmail dot com)
- #Last Change: Wed Nov 26 12:37:24 2008 EST
- #Description: 根据yingjiesheng搜索关键字结果,群发简历,并保存已投
- # 职位具体信息至指定文件夹
- import urllib2, re, os, shelve, time
- searches = [['linux', 'python'], ['unix','python'],['linux','金融'],
- ['unix','金融'],['linux','finance'], ['unix','finance'],
- ]
- savepath = '%s/workspace/career/buster' %os.environ['HOME']
- def getLatestJobs(keywords):
- '''搜索最新工作列表,与以投列表对照,并返回未投工作之链接'''
- #TODO: compare with saved pages
- url0 = "http://s.yingjiesheng.com/result.jsp?keyword=%s&period=3&sort=&jobtype=1" %'+'.join(keywords)
- url = url0+"&start=0"
- page = urllib2.urlopen(url).read()
- match = re.search("共找到(.*)条记录",page)
- if not match: return
- results = re.findall('<h3 class="title"><a href="([^"]*)" target="_blank">.*?</a></h3>',page)
- for i in range(int(match.group(1))/10):
- nurl=url0+"&start=%d0" %(i+1)
- npage = urllib2.urlopen(nurl).read()
- results.extend(re.findall('<h3 class="title"><a href="([^"]*)" target="_blank">.*?</a></h3>',npage))
- return results
- def getEmailAddress(url, savepath):
- '''查找页面,看是否有email地址,返回email地址'''
- page = urllib2.urlopen(url).read()
- match = re.search("(\w+(?:[-+.]\w+)*@\w+(?:[-.]\w+)*\.\w+(?:[-.]\w+)*)",page)
- if not match: return
- #保存
- savedir = '%s/%s' %(savepath,time.strftime("%y-%m-%d"))
- if not os.path.exists(savedir): os.mkdir(savedir)
- file = open("%s/%s" %(savedir,url.split("/")[-1]),'w')
- file.write(page)
- file.close()
- return match.group(1)
- if __name__=='__main__':
- import sys
- #import socket
- #sys.path.append("%s/workspace/python/lib" %os.environ['HOME'])
- #from threadmanager import WorkerManager
- #socket.setdefaulttimeout(10)
- joblist=[]
- #wm = WorkerManager(30)
- for item in searches:
- #wm.add_job(getLatestJobs, item)
- #wm.wait_for_complete()
- #joblist = wm.get_result()
- links = getLatestJobs(item)
- if links is not None:
- joblist += getLatestJobs(item)
- joblist=list(set(joblist))
- submitted = shelve.open("%s/submitted" %savepath)
- emails=[]
- for url in joblist:
- if submitted.has_key(url): continue
- #print url
- e = getEmailAddress(url, savepath)
- #print e
- if e:
- emails.append(e)
- submitted[url]=e
- emails=list(set(emails))
- submitted.close()
- sender="Le Zuo (Roy) <lzuo@graduate.hku.hk>"
- attachment="/home/roylez/workspace/career/doc/resume.pdf"
- subject="应聘"
- mutt = "mutt -s'%s' -e'set from=\"%s\"' -a'%s' %s <$HOME/doc/letter.txt"
- subemails = shelve.open("%s/emails" %savepath)
- for e in emails:
- #使用mutt发送简历,内容为文件模板内容,自动添加附件
- if subemails.has_key(e): continue
- print "Submitting to %s ..." %e
- os.system(mutt %(subject,sender,attachment,e))
- subemails[e]=''
advertising
Update the Post
Either update this post and resubmit it with changes, or make a new post.
You may also comment on this post.
Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.