CSDN博客

img invalid

Proxy4Free代理服务器转换为Maxthon(MYIE2)配置

发表于2004/10/18 9:58:00  7682人阅读

分类: Python

这是我的第一篇blog,希望对大家有用


为了方便更新Maxthon代理服务器,写了个python脚本。
处理proxy4free得到的代理服务器文件,验证代理服务器,
按照响应速度排序,生成maxthon配置格式输出。
下面为文件:proxy2maxthon.py


'''
http://www.proxy4free.com/page1.html
得到代理信息,输出为maxthon(MyIE2格式的代理信息)
用法:把代理信息复制粘贴保存为一个文本文件proxylist.txt,每行为一个代理信息
格式如下:
213.199.192.37 3128 transparent Poland 15.10.2004 Whois
156.110.47.251 8080 anonymous United States 16.10.2004 Whois
68.191.111.217 80 anonymous United States 16.10.2004 Whois
24.14.246.55 80 anonymous United States 16.10.2004 Whois
执行本程序后,把输出屏幕提示信息粘贴到maxthon的
配置文件(安装maxthon目录的Config目录中的setupcenter.ini)中的proxy节中,
注意:修改配置文件时要保证maxthon没有运行。
作者:invalid
电邮:invalid@21cn.com
日期:2004-10-16日
'''
import os
import urllib
import time
import threading
import Queue
from string import strip

version_info = (1,2,"alpha",'20041016')

MAX_THREADS = 50
MAX_PROXYS = 30                     # 只要速度最快的前MAX_PROXYS个代理
HTTP_TIMEOUT = 10.0                 # 等待返回网页的最大时间,秒
TESTURL = 'http://www.google.com/'  # 用来测试代理的网站
TESTRESP = 'Google'                 # 标识成功返回的网页中关键内容
class Sorter:
    '''从Python食谱得到的排序类'''
    def _helper(self, data, aux, inplace):
        aux.sort( )
        result = [data[i] for junk, i in aux]
        if inplace:
            data[:] = result
        return result

    def byItem(self, data, itemindex=None, inplace=1):
        if itemindex is None:
            if inplace:
                data.sort( )
                result = data
            else:
                result = data[:]
                result.sort( )
            return result
        else:
            aux = [(data[i][itemindex], i) for i in range(len(data))]
            return self._helper(data, aux, inplace)

    # a couple of handy synonyms
    sort = byItem
    __call__ = byItem


def TespProxyDirect(proxy):
    ''' 测试代理服务器的函数,返回值为一个元组(成功标志,所用时间)
        缺点是无法控制超时,测试会很慢
        参数proxy的格式为:ip:port
    '''
    iret = 0
    usedtime = 0;
    proxies = proxies={'http': 'http://'+proxy}
    try:
        tstart = time.time()
        filehandle = urllib.urlopen(TESTURL, proxies=proxies)
        data = filehandle.readlines()
        filehandle.close()
        tend = time.time()
        usedtime = tend - tstart
        for line in data:
            if line.find(TESTRESP) > 0:
                iret = 1
                break
    except:
        pass
           
    return (iret,usedtime);

def TestProxy(workQueue, resultQueue):

    ''' 工作线程,测试代理是否能工作,以及代理响应速度
    从workQueue取得代理参数,测试成功保存到 resultQueue.'''
   
    def SubthreadProc(url, result):

        ''' 子工作过程,测试代理能否工作,并且返回响应速度'''
        usedtime = 0;
        proxies = proxies = {'http': 'http://'+proxy}
        try:
            tstart = time.time()
            filehandle = urllib.urlopen(TESTURL, proxies=proxies)
            data = filehandle.readlines()
            filehandle.close()
            tend = time.time()
            usedtime = (tend - tstart)*1000
            for line in data:
                if line.find(TESTRESP) > 0:
                    result.append(usedtime)
                   
        except:
            result.append(-1)
        return
      
    while 1:
        # Contine pulling data from the work queue until it's empty
        try:
            proxy,local = workQueue.get(0)
        except Queue.Empty:
            # work queue is empty--exit the thread proc.
            return

        # Create a single subthread to do the actual work
        result = []
        subThread = threading.Thread(target=SubthreadProc, args=(proxy, result))

        # Daemonize the subthread so that even if a few are hanging
        # around when the process is done, the process will exit.
        subThread.setDaemon(True)

        # Run the subthread and wait for it to finish, or time out
        subThread.start()
        subThread.join(HTTP_TIMEOUT)

        if [] == result:
            # Subthread hasn't give a result yet.  Consider it timed out.
#            print proxy,"TIMEOUT"
            pass
        elif -1 == result[0]:
            # Subthread returned an error from geturl.
#            print proxy,"FAILED"
            pass
        else:
            # Subthread returned a time.  Store it.
#            print proxy,result[0]
            resultQueue.put((proxy,local, result[0]))
           

def GenMaxthonCfg(proxys):
    '''生成Maxthon配置文件所需格式'''
    #index表示开始配置序号
    index = 4
    #timeout表示代理获取google的最大时间,超过这个时间的代理丢弃不要,单位:毫秒
    timeout = 10000
     #按照获取时间排序,时间短的排在前面
    sort = Sorter()
    sort(proxys,2)
    print "*****************************************************"
    for item in proxys:
        print item[0],item[1],int(item[2])
 
    print "*****************************************************"
    print "把下面输入粘贴到maxthon的setupcenter.ini配置文件的proxy节"
    count = 0
    for item in proxys:
        #maxthon的代理配置有如下选项:
        #p1=http=210.230.192.39:3128
        #ps1=3
        #pn1=japan
        if int(item[2]) > timeout:
            continue
        p = "p"+str(index)+"=http="+item[0]
        ps = "ps"+str(index)+"=3"
        pn = "pn"+str(index)+"="+item[1]+str(index)
        print p
        print ps
        print pn
       
        index += 1
        count +=1
        if count > MAX_PROXYS:
            break
    return
   
def main():
   
    #打开代理文件,开始处理
    try:
        file = open("proxylist.txt","r")
    except:
        print "open proxylist file error"
        sys.exit(1)
       
    data = file.readlines()
    file.close()
    print "proxylist file have proxy:",len(data)

    urls = []
    # Record the start time, so we can print a nice message at the end
    processStartTime = time.time()

    numThreads = min(MAX_THREADS, len(data))

    #通过验证的代理记录,单元格式为代理地址端口,名称,获取google时间
    proxys = []
    workQueue = Queue.Queue()
    for line in data:
        if line.find("China") > 0:#不用中国的代理服务器
            continue
        sects = line.split(" ")
        if len(sects) > 6:
            ip = strip(sects[0])
            port = strip(sects[1])
            local = sects[-4]
        if urls.count(ip) > 0:#代理服务器重复了
            continue
        urls.append(ip)
       
        workQueue.put((ip+":"+port,local))

    #工作线程
    workers = []
    resultQueue = Queue.Queue()

    # Create worker threads to load-balance the retrieval
    print "create threads..."
    for threadNum in range(0, numThreads):
        workers.append(threading.Thread(target=TestProxy,
                                        args=(workQueue,resultQueue)))
        workers[-1].start()

    # Wait for all the workers to finish
    print "waiting threads finish..."
    for w in workers:
        w.join()
    print "threads done."
    while not resultQueue.empty():
        proxy,local,result = resultQueue.get(0)
        proxys = proxys + [(proxy,local,int(result))]

    GenMaxthonCfg(proxys)

if __name__ == '__main__':
    main()

阅读全文
0 0

相关文章推荐

img
取 消
img