This website requires JavaScript.

nodejs异步接口爬虫

by  莫名丶恕  
const jsonfile = require('jsonfile');
const {clone,pick,concat,zipObjectDeep,merge,flatten} = require("loadsh");
const HttpsProxyAgent = require('https-proxy-agent');
const axiosDefaultConfig = {
  baseURL: 'xxx',
  proxy: false,
  //代理ip,防止ip被封
  httpsAgent: new HttpsProxyAgent('http://xxx:xxx')
};
const axios = require('axios').create(axiosDefaultConfig);
const getListData = async (arr,device,initTime=3000)=>{
  // 加密结果,错误结果收集
  const successStrList=[],errStrList = []
  const result = await Promise.all(arr.map((str,index)=>{
    const bodyFormData = new FormData();
    bodyFormData.append('data', str);
    // 每隔6次停顿30s,否则接口会报错
    if(!(index%5)){
      initTime = initTime + 30*1000
    }
    return new Promise((resolve, reject)=>{
      const realDevice = /[0-9]+/.test(device)?device:`${index+1}.${device}`
      //间隔2s请求一次
      const delayTime = initTime+index*2000
      setTimeout(async ()=>{
        try {
          const result = await axios({
            url:"/api",
            method: 'POST',
            data:bodyFormData,
            //header信息
            headers: {}
          })
          if(result?.data?.info === 'ok'){
            successStrList.push([str,result?.data?.data?.[0]])
          }else {
            errStrList.push([str,realDevice])
          }
          resolve(result)
        }catch {
          // 所有未知原因返回成功,利用errStrList再请求
          errStrList.push([str,realDevice])
          resolve({})
        }
      },delayTime)
    })
  }))
  return [result.map((res,index)=>({[/[0-9]+/.test(device)?device:`${index+1}.${device}`]:res?.data?.data?.[0]})),errStrList]
}
const getOkList = async (arr,data={})=>{
  // 收集错误再请求
  const result = await Promise.all(arr.map((item,index)=>{
    return getListData([item[0]],item[1],3000*index)
  }))
  //[[[],[],[]]],[[[],[],[]]]
  const newData = merge(...result.map(data=>data[0]).map(item=>getObj(item)))
  data = merge(data,newData)
  if(eval(result.map(data=>data[1].length).join('+'))){
    return await getOkList(result.map(data=>data[1][0]).filter(i=>i),data)
  }
  return  [data,[flatten(result.map(data=>data[2]))]]
}
module.exports = {
  '/await': async (req, res)=>{
    const length = 10
    const getObj = (data)=>{
      return zipObjectDeep(
        data.map((item)=>Object.keys(item)[0]),
        data.map((item)=>Object.values(item)[0])
      )
    }
    const sportIos = await getListData([...Array(length).keys()].map(i=>`${i+1}.json`),"sport.ios",0)
    const errList = [...sportIos[1]]
    //{0:{"sport":{"ios":""}}}
    const sportIosObj = getObj(sportIos[0])
    const errInfo= await getOkList(errList)
    const successList = [...sportIos[2],...errInfo[1]]
    const data = merge(sportIosObj,errInfo[0])
    jsonfile.writeFileSync(`data/${platform}.json`,successList)
    return  res.send({code:0,data ,errList,msg:'获取成功'})
  }
}

参考资料:
axios设置代理ip

相关推荐