Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
251 views
in Technique[技术] by (71.8m points)

node.js - Puppeteer & Node-CRON: hangs after first execution

I'm attempting to make a full stack site with a scraper that runs on a schedule, storing an object in a MongoDB collection periodically. I'm attempting to use Puppeteer and cheerio with node-cron for the scheduled scrape/store. The code works great, but it only works once. It seems Puppeteer can't close itself once its tasks are complete, but I could be very wrong as I'm new at this.

CRON Schedule

let cron = require("node-cron");
let shell = require("shelljs");

cron.schedule("* * * * * *", () => {
    console.log("Scheduler executing...");
    if(shell.exec("node scraper.js").code !== 0) {
        console.log("Scheduler halted!");
    } 
});

Web Scraper

var { MongoClient } = require('mongodb');
var uri = "MONGODB_URI";
const client = new MongoClient(uri, {useNewUrlParser: true, useUnifiedTopology: true});
const puppeteer = require('puppeteer');
const $ = require('cheerio');
const url = 'SCRAPE_TARGET_URL';

let myObj = {
    date: Date(),
    one: '',
    one-secondary: '',
    two: '',
    two-secondary: '',
    three: '',
    three-secondary: ''
}

puppeteer
  .launch()
  .then(function(browser) {
    return browser.newPage();
  })
  .then(function(page) {
    return page.goto(url).then(function() {
      return page.content();
    });
  })
  .then(function(html) {
    $('#target-one', html).each(function() {
        console.log($(this).text());
        return myObj.one = $(this).text();
    });
    $('#target-one-secondary', html).each(function() {
        console.log($(this).text());
        return myObj.oneSecondary = $(this).text();
      });
    $('#target-two', html).each(function() {
        console.log($(this).text());
        return myObj.two = $(this).text();
      });
    $('#target-two-secondary', html).each(function() {
        console.log($(this).text());
        return myObj.twoSecondary = $(this).text();
    });
    $('#target-three', html).each(function() {
        console.log($(this).text());
        return myObj.three = $(this).text();
    });
    $('#target-three-secondary', html).each(function() {
        console.log($(this).text());
        return myObj.threeSecondary = $(this).text();
    });
    return console.log(myObj);
  })
  .then(async function() {
      await client.connect();
    
      const database = client.db("MY_DB");
      const collection = database.collection("MY_COLLECTION");
    
      const doc = myObj;
      const result = await collection.insertOne(doc);
    
      console.log(
        `${result.insertedCount} documents were inserted with the _id: ${result.insertedId}`,
      );
      await client.close();
    }
  );
question from:https://stackoverflow.com/questions/65835323/puppeteer-node-cron-hangs-after-first-execution

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)
Waitting for answers

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...