Add caching into the code to make it faster.

This commit is contained in:
yuanhau 2025-05-20 20:14:29 +08:00
parent 2d8628d0da
commit bc9a63f6ab
3 changed files with 101 additions and 15 deletions

View file

@ -4,6 +4,10 @@
App Design: [PDF Document](/design.pdf)
## Before deploying, please know this:
This code is absolutly NOT designed to be spinned up at Vercel or Netlify, it has the scraping system now inside of the main website code, oh also the entire "caching feature" is based in memory, so please don't use those platforms, for Zeabur your cost might be expensive. idk, I haven't tried it yet. The web url: https://news.yuanhau.com is hosted on my own infra, you should too. Please get a server off of yahoo 拍賣 or 蝦皮 to do so.
## Why?
我們使用這個新聞來舉例:

View file

@ -1,5 +1,17 @@
// Check /about/scraping_line_today_home.md for more info or https://news.yuanhau.com/datainfo/linetodayjsondata.json
interface CacheItem {
data: string[];
timestamp: number;
}
const cache: Record<string, CacheItem> = {};
const CACHE_DURATION = 1000 * 60 * 60; // 1 Hour
async function getLineTodayData(type: string) {
if (cache[type] && Date.now() - cache[type].timestamp < CACHE_DURATION) {
console.log("Serving from cache for type:", type);
return cache[type].data;
}
try {
const buildUrl = `https://today.line.me/_next/data/v1/tw/v3/tab/${type}.json?tabs=${type}`;
const req = await fetch(buildUrl, {
@ -21,17 +33,42 @@ async function getLineTodayData(type: string) {
req3.push(listing.id);
}
});
} else if (listings && listings.id) {
req3.push(listings.id);
}
});
cache[type] = {
data: req3,
timestamp: Date.now(),
};
return req3;
} catch (e) {
console.log(e);
if (cache[type]) {
console.log("Serving expired cache due to error");
return cache[type].data;
}
return [];
}
}
function filterUUIDs(ids: string[]): string[] {
const uuidPattern =
/^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
return ids.filter((id) => uuidPattern.test(id));
}
export default defineEventHandler(async (event) => {
const slug = getRouterParam(event, "slug");
return await getLineTodayData(slug);
const query = getQuery(event);
if (!query.query) {
return {
error: "NOT_A_QUERY",
};
}
const data = await getLineTodayData(String(query.query));
const validUUIDs = filterUUIDs(data || []);
return {
data: validUUIDs,
cached: !!cache[String(query.query)],
};
});

View file

@ -1,6 +1,31 @@
import lineToday from "~/server/scrape/line_today";
import sql from "~/server/components/postgres";
import saveDataToSql from "~/server/scrape/save_scrape_data";
interface CacheItems {
title: string;
paragraph: string[];
origin: string;
author: string;
images: string[];
cached: boolean;
articleId: string;
timestamp: number;
}
const CACHE_DURATION = 1000 * 60 * 60;
const cache: Record<string, CacheItems> = {};
function cleanupCache() {
const now = Date.now();
Object.keys(cache).forEach((key) => {
if (now - cache[key].timestamp > CACHE_DURATION) {
delete cache[key];
}
});
}
setInterval(cleanupCache, CACHE_DURATION);
function cleanUpSlug(orgslug: string) {
let slug = orgslug.trim();
@ -14,16 +39,36 @@ function cleanUpSlug(orgslug: string) {
export default defineEventHandler(async (event) => {
const slug = getRouterParam(event, "slug");
const cleanSlug = cleanUpSlug(slug);
/*const result = await sql`
select * from articles_lt
where slug = ${cleanSlug}
`;*/
if (false) {
//return result;
} else {
if (
cache[cleanSlug] &&
Date.now() - cache[cleanSlug].timestamp < CACHE_DURATION
) {
return {
...cache[cleanSlug],
cached: true,
};
}
try {
const data = await lineToday(cleanSlug);
//saveDataToSql(data, slug);
console.log(data);
return data;
cache[cleanSlug] = {
...data,
timestamp: Date.now(),
};
return {
...data,
cached: false,
};
} catch (e) {
if (cache[cleanSlug]) {
return {
...cache[cleanSlug],
cached: true,
};
}
throw createError({
statusCode: 500,
message: "SERVER_SIDE_ERROR",
});
}
});