export const config = {
url: "https://news.ycombinator.com/",
}
export default function ({ doc }) {
const title = doc.find("title");
const posts = doc.find(".athing");
return {
title: title.text(),
posts: posts.map((post) => {
const link = post.find(".titleline > a");
return {
title: link.text(),
url: link.attr("href"),
};
}),
}
}
> flyscrape run hackernews.js
[
{
"url": "https://news.ycombinator.com/",
"data": {
"title": "Hacker News",
"posts": [
{
"title": "Flyscrape - An standalone and scriptable web scraper",
"url": "https://flyscrape.com/"
},
...
]
}
}
]
brew install flyscrape
Flyscrape comes as a single binary executable. No need to fill up your disk with npm packages.
Use JavaScript to write your data extraction logic with a familiar jQuery like API.
Take control over the scraping behavior by configuring caching, domain whitelisting, rate limiting, proxies and many more.