mirror of
https://github.com/MathiasDPX/grainParisArt.git
synced 2025-05-10 07:33:05 +00:00
Cleanup + Use API instead of scraping
This commit is contained in:
parent
ea9f4d79b0
commit
9deeb46e0c
27 changed files with 2026 additions and 2504 deletions
117
modules/Classes.py
Normal file
117
modules/Classes.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
from datetime import datetime
|
||||
import requests
|
||||
|
||||
class Movie:
|
||||
def __init__(self, data) -> None:
|
||||
self.data = data
|
||||
self.title = data["title"]
|
||||
self.id = data['internalId']
|
||||
self.runtime = data["runtime"]
|
||||
self.synopsis = data["synopsis"]
|
||||
self.genres = [genre['translate'] for genre in data["genres"]]
|
||||
try:
|
||||
self.affiche = data["poster"]["url"]
|
||||
except:
|
||||
self.affiche = "https://upload.wikimedia.org/wikipedia/commons/a/a3/Image-not-found.png" #TODO: Remplacer par une joli image qui fait 1067x1600
|
||||
self.cast = []
|
||||
|
||||
# Noms des acteurs
|
||||
for actor in data["cast"]["edges"]:
|
||||
if actor["node"]["actor"] == None: continue
|
||||
|
||||
if actor["node"]["actor"]["lastName"] == None:
|
||||
actor["node"]["actor"]["lastName"] = ""
|
||||
|
||||
if actor["node"]["actor"]["firstName"] == None:
|
||||
actor["node"]["actor"]["firstName"] = ""
|
||||
|
||||
name = f'{actor["node"]["actor"]["firstName"]} {actor["node"]["actor"]["lastName"]}'
|
||||
name = name.lstrip()
|
||||
self.cast.append(name)
|
||||
|
||||
# Nom du réalisateur
|
||||
if len(data["credits"]) == 0:
|
||||
self.director = "Inconnu"
|
||||
else:
|
||||
if data["credits"][0]["person"]["lastName"] == None:
|
||||
data["credits"][0]["person"]["lastName"] = ""
|
||||
|
||||
if data["credits"][0]["person"]["firstName"] == None:
|
||||
data["credits"][0]["person"]["firstName"] = ""
|
||||
|
||||
self.director = f'{data["credits"][0]["person"]["firstName"]} {data["credits"][0]["person"]["lastName"]}'
|
||||
self.director = self.director.lstrip()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.__class__.__name__} name={self.title}>"
|
||||
|
||||
class Showtime:
|
||||
def __init__(self, data, theather, movie:Movie) -> None:
|
||||
self.startsAt = datetime.fromisoformat(data['startsAt'])
|
||||
self.diffusionVersion = data['diffusionVersion']
|
||||
self.services = data["service"]
|
||||
self.theater:Theater = theather
|
||||
self.movie = movie
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.__class__.__name__} name={self.movie.title} startsAt={self.startsAt}>"
|
||||
|
||||
class Theater:
|
||||
def __init__(self, data) -> None:
|
||||
self.name = data['name']
|
||||
self.id = data['internalId']
|
||||
self.location = data['location']
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.__class__.__name__} name={self.name}>"
|
||||
|
||||
def getShowtimes(self, date:datetime) -> list[Showtime]:
|
||||
datestr = date.strftime("%Y-%m-%d")
|
||||
r = requests.get(f"https://www.allocine.fr/_/showtimes/theater-{self.id}/d-{datestr}/")
|
||||
if r.status_code != 200:
|
||||
return {"error": True, "messag": r.status_code, "content": r.content}
|
||||
|
||||
try:
|
||||
data = r.json()
|
||||
except:
|
||||
return {"error": True, "message": "Can't parse JSON", "content": r.content}
|
||||
|
||||
if data['error']:
|
||||
return {"error": True, "message": "Error in request", "content": data}
|
||||
|
||||
showtimes = []
|
||||
|
||||
for movie in data['results']:
|
||||
inst = Movie(movie["movie"])
|
||||
movie_showtimes = []
|
||||
movie_showtimes.extend(movie["showtimes"]["dubbed"])
|
||||
movie_showtimes.extend(movie["showtimes"]["original"])
|
||||
movie_showtimes.extend(movie["showtimes"]["local"])
|
||||
|
||||
for showtime_data in movie_showtimes:
|
||||
showtimes.append(Showtime(showtime_data, self, inst))
|
||||
|
||||
return showtimes
|
||||
|
||||
@staticmethod
|
||||
def new(query:str):
|
||||
r = requests.get(f"https://www.allocine.fr/_/localization_city/{query}")
|
||||
|
||||
try:
|
||||
data = r.json()
|
||||
except:
|
||||
return {"error": True, "message": "Can't parse JSON", "content": r.content}
|
||||
|
||||
if len(data["values"]["theaters"]) == 0:
|
||||
return {"error": True, "message": "Not found", "content": r.content}
|
||||
|
||||
return Theater(data["values"]["theaters"][0]["node"])
|
||||
|
||||
if __name__ == "__main__":
|
||||
cgr = Theater.new("CGR Brest Le Celtic")
|
||||
print(f"{cgr.name} ({cgr.id})")
|
||||
print(f"{cgr.location['zip']} {cgr.location['city']}")
|
||||
|
||||
showtimes = cgr.getShowtimes(datetime.today())
|
||||
|
||||
print(showtimes[0])
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
200
modules/date.py
200
modules/date.py
|
@ -1,200 +0,0 @@
|
|||
import time
|
||||
from datetime import datetime
|
||||
|
||||
# Converti les mois en chiffre en lettres
|
||||
def chiffre_intoMonth(month):
|
||||
match month:
|
||||
case 1:
|
||||
return 'janvier'
|
||||
case 2:
|
||||
return 'février'
|
||||
case 3:
|
||||
return 'mars'
|
||||
case 4:
|
||||
return 'avril'
|
||||
case 5:
|
||||
return 'mai'
|
||||
case 6:
|
||||
return 'juin'
|
||||
case 7:
|
||||
return 'juillet'
|
||||
case 8:
|
||||
return 'août'
|
||||
case 9:
|
||||
return 'septembre'
|
||||
case 10:
|
||||
return 'octobre'
|
||||
case 11:
|
||||
return 'novembre'
|
||||
case 12:
|
||||
return 'décembre'
|
||||
case _:
|
||||
return 'invalid month'
|
||||
|
||||
# Calcule le décallage des jours et traduits en lettres
|
||||
def anglais_intoJourFrancais(jour, decalage):
|
||||
if decalage == 0:
|
||||
match jour:
|
||||
case "Monday":
|
||||
return 'lun'
|
||||
case 'Tuesday':
|
||||
return 'mar'
|
||||
case 'Wednesday':
|
||||
return 'mer'
|
||||
case 'Thursday':
|
||||
return 'jeu'
|
||||
case 'Friday':
|
||||
return 'ven'
|
||||
case 'Saturday':
|
||||
return 'sam'
|
||||
case 'Sunday':
|
||||
return 'dim'
|
||||
case _:
|
||||
return 'invalid jour'
|
||||
elif decalage == 1:
|
||||
match jour:
|
||||
case "Monday":
|
||||
return 'mar'
|
||||
case 'Tuesday':
|
||||
return 'mer'
|
||||
case 'Wednesday':
|
||||
return 'jeu'
|
||||
case 'Thursday':
|
||||
return 'ven'
|
||||
case 'Friday':
|
||||
return 'sam'
|
||||
case 'Saturday':
|
||||
return 'dim'
|
||||
case 'Sunday':
|
||||
return 'lun'
|
||||
case _:
|
||||
return 'invalid jour'
|
||||
elif decalage == 2:
|
||||
match jour:
|
||||
case "Monday":
|
||||
return 'mer'
|
||||
case 'Tuesday':
|
||||
return 'jeu'
|
||||
case 'Wednesday':
|
||||
return 'ven'
|
||||
case 'Thursday':
|
||||
return 'sam'
|
||||
case 'Friday':
|
||||
return 'dim'
|
||||
case 'Saturday':
|
||||
return 'lun'
|
||||
case 'Sunday':
|
||||
return 'mar'
|
||||
case _:
|
||||
return 'invalid jour'
|
||||
elif decalage == 3:
|
||||
match jour:
|
||||
case "Monday":
|
||||
return 'jeu'
|
||||
case 'Tuesday':
|
||||
return 'ven'
|
||||
case 'Wednesday':
|
||||
return 'sam'
|
||||
case 'Thursday':
|
||||
return 'dim'
|
||||
case 'Friday':
|
||||
return 'lun'
|
||||
case 'Saturday':
|
||||
return 'mar'
|
||||
case 'Sunday':
|
||||
return 'mer'
|
||||
case _:
|
||||
return 'invalid jour'
|
||||
elif decalage == 4:
|
||||
match jour:
|
||||
case "Monday":
|
||||
return 'ven'
|
||||
case 'Tuesday':
|
||||
return 'sam'
|
||||
case 'Wednesday':
|
||||
return 'dim'
|
||||
case 'Thursday':
|
||||
return 'lun'
|
||||
case 'Friday':
|
||||
return 'mar'
|
||||
case 'Saturday':
|
||||
return 'mer'
|
||||
case 'Sunday':
|
||||
return 'jeu'
|
||||
case _:
|
||||
return 'invalid jour'
|
||||
elif decalage == 5:
|
||||
match jour:
|
||||
case "Monday":
|
||||
return 'sam'
|
||||
case 'Tuesday':
|
||||
return 'dim'
|
||||
case 'Wednesday':
|
||||
return 'lun'
|
||||
case 'Thursday':
|
||||
return 'mar'
|
||||
case 'Friday':
|
||||
return 'mer'
|
||||
case 'Saturday':
|
||||
return 'jeu'
|
||||
case 'Sunday':
|
||||
return 'ven'
|
||||
case _:
|
||||
return 'invalid jour'
|
||||
elif decalage == 6:
|
||||
match jour:
|
||||
case "Monday":
|
||||
return 'dim'
|
||||
case 'Tuesday':
|
||||
return 'lun'
|
||||
case 'Wednesday':
|
||||
return 'mar'
|
||||
case 'Thursday':
|
||||
return 'mer'
|
||||
case 'Friday':
|
||||
return 'jeu'
|
||||
case 'Saturday':
|
||||
return 'ven'
|
||||
case 'Sunday':
|
||||
return 'sam'
|
||||
case _:
|
||||
return 'invalid jour'
|
||||
|
||||
def testChiffreJour(chiffre, decalage):
|
||||
today = datetime.today()
|
||||
month = today.month
|
||||
|
||||
if month in [1, 3, 5, 7, 8, 10, 12]:
|
||||
max_days = 31
|
||||
elif month == 2:
|
||||
if (today.year % 4 == 0 and today.year % 100 != 0) or (today.year % 400 == 0):
|
||||
max_days = 29
|
||||
else:
|
||||
max_days = 28
|
||||
else:
|
||||
max_days = 30
|
||||
|
||||
if chiffre + decalage > max_days:
|
||||
return chiffre + decalage - max_days
|
||||
else:
|
||||
return chiffre + decalage
|
||||
|
||||
def testMoisNumero(chiffre, decalage):
|
||||
today = datetime.today()
|
||||
month = today.month
|
||||
|
||||
if month in [1, 3, 5, 7, 8, 10, 12]:
|
||||
max_days = 31
|
||||
elif month == 2:
|
||||
if (today.year % 4 == 0 and today.year % 100 != 0) or (today.year % 400 == 0):
|
||||
max_days = 29
|
||||
else:
|
||||
max_days = 28
|
||||
else:
|
||||
max_days = 30
|
||||
|
||||
if chiffre + decalage > max_days:
|
||||
next_month = month + 1 if month < 12 else 1
|
||||
return chiffre_intoMonth(next_month)
|
||||
else:
|
||||
return chiffre_intoMonth(month)
|
|
@ -1,73 +0,0 @@
|
|||
import firebase_admin
|
||||
from firebase_admin import credentials, db
|
||||
import urllib.parse
|
||||
|
||||
cred = credentials.Certificate('static/firebase/firebase_grainParisArt.json')
|
||||
firebase_admin.initialize_app(cred, {
|
||||
'databaseURL': '###'
|
||||
})
|
||||
|
||||
ref = db.reference('/')
|
||||
|
||||
film = {
|
||||
'titre': "L'I.A. du mal",
|
||||
'realisateur': 'Luca Guadagnino',
|
||||
'casting': [' Zendaya', "Josh O'Connor", 'Mike Faist'],
|
||||
'genres': ['Drame', 'Romance'],
|
||||
'duree': {'heure': 2, 'minute': 12},
|
||||
'affiche': 'https://fr.web.img2.acsta.net/c_310_420/pictures/24/01/15/10/08/2202044.jpg',
|
||||
'synopsis': '\nDurant leurs études, Patrick et Art, tombent amoureux de Tashi. À la fois amis, amants et rivaux, ils voient tous les trois leurs chemins se recroiser des années plus tard. Leur passé et leur présent s’entrechoquent et des tensions jusque-là inavouées refont surface.\n',
|
||||
'horaires': [{'cinema': 'MK2 Parnasse', 'seances': ['20:45']}]
|
||||
}
|
||||
|
||||
def encode_node_name(name):
|
||||
replacements = {
|
||||
'.': '__dot__',
|
||||
'$': '__dollar__',
|
||||
'#': '__hash__',
|
||||
'[': '__lbrack__',
|
||||
']': '__rbrack__',
|
||||
'/': '__slash__'
|
||||
}
|
||||
|
||||
for char, replacement in replacements.items():
|
||||
name = name.replace(char, replacement)
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def enregistrementFilm(film):
|
||||
cleaned_movie_name = encode_node_name(film['titre'])
|
||||
movie_ref = ref.child(cleaned_movie_name)
|
||||
movie_ref.set({
|
||||
'titre': film['titre'],
|
||||
'realisateur': film['realisateur'],
|
||||
'casting': film['casting'],
|
||||
'genres': film['genres'],
|
||||
'duree': film['duree'],
|
||||
'affiche': film['affiche'],
|
||||
'synopsis': film['synopsis']
|
||||
})
|
||||
print(f"Node '{film['titre']}' created successfully with details!")
|
||||
|
||||
def recupererDataFilm(nomFilm, realisateur):
|
||||
cleaned_movie_name = encode_node_name(nomFilm)
|
||||
print(cleaned_movie_name)
|
||||
movie_ref = ref.child(cleaned_movie_name)
|
||||
|
||||
# Lire les données du nœud
|
||||
movie_data = movie_ref.get()
|
||||
|
||||
if movie_data:
|
||||
# Vérifier si le réalisateur correspond
|
||||
if movie_data.get('realisateur') == realisateur:
|
||||
return movie_data
|
||||
else:
|
||||
return 0
|
||||
else:
|
||||
return 0
|
||||
|
||||
def supprimerTousLesFilms():
|
||||
root_ref = ref
|
||||
root_ref.delete()
|
||||
print("Tous les films ont été supprimés.")
|
|
@ -1,153 +0,0 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import requests_cache
|
||||
from datetime import timedelta
|
||||
from modules.firebase import enregistrementFilm, recupererDataFilm
|
||||
|
||||
|
||||
requests_cache.install_cache('film_cache', expire_after=timedelta(minutes=5))
|
||||
|
||||
# Récolte les données
|
||||
def scrap_infoFilm(url, cinema):
|
||||
films = []
|
||||
response = requests.get(url)
|
||||
reponse_text = response.text
|
||||
soupReponse = BeautifulSoup(reponse_text, 'html.parser')
|
||||
|
||||
# films_list = soupReponse.find('div', class_="showtimes-list-holder").find_all('div', class_="card entity-card entity-card-list movie-card-theater cf hred")
|
||||
films_list_container = soupReponse.find('div', class_="showtimes-list-holder")
|
||||
if films_list_container:
|
||||
films_list = films_list_container.find_all('div', class_="card entity-card entity-card-list movie-card-theater cf hred")
|
||||
for film in films_list:
|
||||
titre = film.find("div", class_="meta").find('h2', class_="meta-title").find("a").get_text()
|
||||
realisateur_section = film.find("div", class_="meta-body-item meta-body-direction")
|
||||
|
||||
if realisateur_section:
|
||||
realisateur = realisateur_section.find('span', class_="dark-grey-link").get_text()
|
||||
else:
|
||||
realisateur = "Réalisateur non trouvé"
|
||||
|
||||
dataFilm_firebase = recupererDataFilm(titre, realisateur)
|
||||
if dataFilm_firebase == 0:
|
||||
# Extraction de l'image
|
||||
thumbnail_img = film.find('img', class_='thumbnail-img')
|
||||
if thumbnail_img and not thumbnail_img['src'].startswith('data:image'):
|
||||
img_url = thumbnail_img['src']
|
||||
else:
|
||||
urlAffiche = "https://www.allocine.fr" + film.find("div", class_="meta").find('h2', class_="meta-title").find("a")['href']
|
||||
responseAffiche = requests.get(urlAffiche)
|
||||
pageFilm = BeautifulSoup(responseAffiche.text, 'html.parser')
|
||||
thumbnail_img = pageFilm.find('img', class_='thumbnail-img')
|
||||
img_url = thumbnail_img['src'] if thumbnail_img and not thumbnail_img['src'].startswith('data:image') else 'Image de la vignette non trouvée'
|
||||
|
||||
synopsis = film.find('div', class_="synopsis").find('div', class_="content-txt").get_text() if film.find('div', class_="synopsis") else "synopsis non trouvé"
|
||||
acteur_container = film.find("div", class_="meta-body-item meta-body-actor")
|
||||
acteurs = [acteur.get_text() for acteur in acteur_container.find_all("span", class_="dark-grey-link")] if acteur_container else ["acteurs non trouvés"]
|
||||
|
||||
horaire_sections = film.find_all("div", class_="showtimes-hour-block")
|
||||
horaires = [horaire_section.find('span', class_="showtimes-hour-item-value").get_text() for horaire_section in horaire_sections if horaire_section.find('span', class_="showtimes-hour-item-value")] or ["Horaire non trouvé"]
|
||||
|
||||
genre_container = film.find("div", class_="meta-body-item meta-body-info")
|
||||
genres = [span.get_text().strip() for span in genre_container.find_all("span") if 'class' in span.attrs and not span.attrs['class'][0].startswith('spacer') and 'nationality' not in span.attrs['class']] if genre_container else ["Genre non trouvé"]
|
||||
if genres: genres.pop(0)
|
||||
|
||||
# Récupération de la durée du film
|
||||
url = "https://api.themoviedb.org/3/search/movie?query=" + titre
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"Authorization": "###"
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data['results']:
|
||||
film_id = data['results'][0]['id']
|
||||
url = "https://api.themoviedb.org/3/movie/" + str(film_id)
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
data = response.json()
|
||||
duree_film = data['runtime']
|
||||
|
||||
heure = duree_film // 60
|
||||
minute = duree_film % 60
|
||||
else:
|
||||
heure = 0
|
||||
minute = 0
|
||||
else:
|
||||
heure = 0
|
||||
minute = 0
|
||||
|
||||
film_data = {
|
||||
"titre": titre,
|
||||
"realisateur": realisateur,
|
||||
"casting": acteurs,
|
||||
"genres": genres,
|
||||
"duree": {"heure": heure, "minute": minute},
|
||||
"affiche": img_url,
|
||||
"synopsis": synopsis,
|
||||
"horaires": [
|
||||
{
|
||||
"cinema": cinema,
|
||||
"seances": horaires
|
||||
}
|
||||
]
|
||||
}
|
||||
enregistrementFilm(film_data)
|
||||
print(f"{film_data['titre']} : enregistré dans la db")
|
||||
else:
|
||||
horaire_sections = film.find_all("div", class_="showtimes-hour-block")
|
||||
horaires = [horaire_section.find('span', class_="showtimes-hour-item-value").get_text() for horaire_section in horaire_sections if horaire_section.find('span', class_="showtimes-hour-item-value")] or ["Horaire non trouvé"]
|
||||
|
||||
film_data = {
|
||||
"titre": dataFilm_firebase['titre'],
|
||||
"realisateur": dataFilm_firebase['realisateur'],
|
||||
"casting": dataFilm_firebase['casting'],
|
||||
'genres': ['Drame', 'Romance'],
|
||||
"duree": dataFilm_firebase['duree'],
|
||||
"affiche": dataFilm_firebase['affiche'],
|
||||
"synopsis": dataFilm_firebase['synopsis'],
|
||||
"horaires": [
|
||||
{
|
||||
"cinema": cinema,
|
||||
"seances": horaires
|
||||
}
|
||||
]
|
||||
}
|
||||
print(f"{film_data['titre']} : récupéré dans la db")
|
||||
|
||||
# Ajout du film s'il n'existe pas déjà
|
||||
existing_film = next((f for f in films if f["titre"] == titre), None)
|
||||
if existing_film:
|
||||
existing_film["horaires"].append({
|
||||
"cinema": cinema,
|
||||
"seances": horaires
|
||||
})
|
||||
else:
|
||||
films.append(film_data)
|
||||
else:
|
||||
print(f"L'élément 'showtimes-list-holder' n'a pas été trouvé pour l'URL {url}")
|
||||
films_list = []
|
||||
return films
|
||||
|
||||
def get_data(cinemas):
|
||||
films = []
|
||||
for cinema in cinemas:
|
||||
result = scrap_infoFilm(cinema["url"], cinema["salle"])
|
||||
films.extend(result)
|
||||
return films
|
||||
|
||||
def cleanFilms(films):
|
||||
filmsClean = []
|
||||
for film in films:
|
||||
existing_film = next((f for f in filmsClean if f["titre"] == film["titre"]), None)
|
||||
if existing_film:
|
||||
existing_film["horaires"].append({
|
||||
"cinema": film["horaires"][0]["cinema"],
|
||||
"seances": film["horaires"][0]["seances"]
|
||||
})
|
||||
else:
|
||||
filmsClean.append(film)
|
||||
return filmsClean
|
|
@ -1,40 +0,0 @@
|
|||
import time
|
||||
from datetime import datetime
|
||||
import datetime
|
||||
|
||||
def decalageDate(baseURL, decalage):
|
||||
today = datetime.datetime.today()
|
||||
|
||||
day = today.day
|
||||
year = today.year
|
||||
month = today.month
|
||||
|
||||
if month in [1, 3, 5, 7, 8, 10, 12]:
|
||||
max_days = 31
|
||||
elif month == 2:
|
||||
if (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):
|
||||
max_days = 29
|
||||
else:
|
||||
max_days = 28
|
||||
else:
|
||||
max_days = 30
|
||||
|
||||
if day + decalage > max_days:
|
||||
if month + 1 > 12:
|
||||
year = year + 1
|
||||
month = 1
|
||||
day = day + decalage - max_days
|
||||
url = baseURL + str(year) + "-" + str(month).zfill(2) + "-" + str(day).zfill(2)
|
||||
print(url)
|
||||
return url
|
||||
else:
|
||||
day = day + decalage - max_days
|
||||
url = baseURL + str(year) + "-" + str(month).zfill(2) + "-" + str(day).zfill(2)
|
||||
print(url)
|
||||
return url
|
||||
else:
|
||||
day = day + decalage
|
||||
url = baseURL + str(year) + "-" + str(month).zfill(2) + "-" + str(day).zfill(2)
|
||||
print(url)
|
||||
return url
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue