Push public

This commit is contained in:
Solène 2024-09-16 13:29:02 +02:00
commit ea9f4d79b0
35 changed files with 3293 additions and 0 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

200
modules/date.py Normal file
View file

@ -0,0 +1,200 @@
import time
from datetime import datetime
# Converti les mois en chiffre en lettres
def chiffre_intoMonth(month):
match month:
case 1:
return 'janvier'
case 2:
return 'février'
case 3:
return 'mars'
case 4:
return 'avril'
case 5:
return 'mai'
case 6:
return 'juin'
case 7:
return 'juillet'
case 8:
return 'août'
case 9:
return 'septembre'
case 10:
return 'octobre'
case 11:
return 'novembre'
case 12:
return 'décembre'
case _:
return 'invalid month'
# Calcule le décallage des jours et traduits en lettres
def anglais_intoJourFrancais(jour, decalage):
if decalage == 0:
match jour:
case "Monday":
return 'lun'
case 'Tuesday':
return 'mar'
case 'Wednesday':
return 'mer'
case 'Thursday':
return 'jeu'
case 'Friday':
return 'ven'
case 'Saturday':
return 'sam'
case 'Sunday':
return 'dim'
case _:
return 'invalid jour'
elif decalage == 1:
match jour:
case "Monday":
return 'mar'
case 'Tuesday':
return 'mer'
case 'Wednesday':
return 'jeu'
case 'Thursday':
return 'ven'
case 'Friday':
return 'sam'
case 'Saturday':
return 'dim'
case 'Sunday':
return 'lun'
case _:
return 'invalid jour'
elif decalage == 2:
match jour:
case "Monday":
return 'mer'
case 'Tuesday':
return 'jeu'
case 'Wednesday':
return 'ven'
case 'Thursday':
return 'sam'
case 'Friday':
return 'dim'
case 'Saturday':
return 'lun'
case 'Sunday':
return 'mar'
case _:
return 'invalid jour'
elif decalage == 3:
match jour:
case "Monday":
return 'jeu'
case 'Tuesday':
return 'ven'
case 'Wednesday':
return 'sam'
case 'Thursday':
return 'dim'
case 'Friday':
return 'lun'
case 'Saturday':
return 'mar'
case 'Sunday':
return 'mer'
case _:
return 'invalid jour'
elif decalage == 4:
match jour:
case "Monday":
return 'ven'
case 'Tuesday':
return 'sam'
case 'Wednesday':
return 'dim'
case 'Thursday':
return 'lun'
case 'Friday':
return 'mar'
case 'Saturday':
return 'mer'
case 'Sunday':
return 'jeu'
case _:
return 'invalid jour'
elif decalage == 5:
match jour:
case "Monday":
return 'sam'
case 'Tuesday':
return 'dim'
case 'Wednesday':
return 'lun'
case 'Thursday':
return 'mar'
case 'Friday':
return 'mer'
case 'Saturday':
return 'jeu'
case 'Sunday':
return 'ven'
case _:
return 'invalid jour'
elif decalage == 6:
match jour:
case "Monday":
return 'dim'
case 'Tuesday':
return 'lun'
case 'Wednesday':
return 'mar'
case 'Thursday':
return 'mer'
case 'Friday':
return 'jeu'
case 'Saturday':
return 'ven'
case 'Sunday':
return 'sam'
case _:
return 'invalid jour'
def testChiffreJour(chiffre, decalage):
today = datetime.today()
month = today.month
if month in [1, 3, 5, 7, 8, 10, 12]:
max_days = 31
elif month == 2:
if (today.year % 4 == 0 and today.year % 100 != 0) or (today.year % 400 == 0):
max_days = 29
else:
max_days = 28
else:
max_days = 30
if chiffre + decalage > max_days:
return chiffre + decalage - max_days
else:
return chiffre + decalage
def testMoisNumero(chiffre, decalage):
today = datetime.today()
month = today.month
if month in [1, 3, 5, 7, 8, 10, 12]:
max_days = 31
elif month == 2:
if (today.year % 4 == 0 and today.year % 100 != 0) or (today.year % 400 == 0):
max_days = 29
else:
max_days = 28
else:
max_days = 30
if chiffre + decalage > max_days:
next_month = month + 1 if month < 12 else 1
return chiffre_intoMonth(next_month)
else:
return chiffre_intoMonth(month)

73
modules/firebase.py Normal file
View file

@ -0,0 +1,73 @@
import firebase_admin
from firebase_admin import credentials, db
import urllib.parse
cred = credentials.Certificate('static/firebase/firebase_grainParisArt.json')
firebase_admin.initialize_app(cred, {
'databaseURL': '###'
})
ref = db.reference('/')
film = {
'titre': "L'I.A. du mal",
'realisateur': 'Luca Guadagnino',
'casting': [' Zendaya', "Josh O'Connor", 'Mike Faist'],
'genres': ['Drame', 'Romance'],
'duree': {'heure': 2, 'minute': 12},
'affiche': 'https://fr.web.img2.acsta.net/c_310_420/pictures/24/01/15/10/08/2202044.jpg',
'synopsis': '\nDurant leurs études, Patrick et Art, tombent amoureux de Tashi. À la fois amis, amants et rivaux, ils voient tous les trois leurs chemins se recroiser des années plus tard. Leur passé et leur présent sentrechoquent et des tensions jusque-là inavouées refont surface.\n',
'horaires': [{'cinema': 'MK2 Parnasse', 'seances': ['20:45']}]
}
def encode_node_name(name):
replacements = {
'.': '__dot__',
'$': '__dollar__',
'#': '__hash__',
'[': '__lbrack__',
']': '__rbrack__',
'/': '__slash__'
}
for char, replacement in replacements.items():
name = name.replace(char, replacement)
return name
def enregistrementFilm(film):
cleaned_movie_name = encode_node_name(film['titre'])
movie_ref = ref.child(cleaned_movie_name)
movie_ref.set({
'titre': film['titre'],
'realisateur': film['realisateur'],
'casting': film['casting'],
'genres': film['genres'],
'duree': film['duree'],
'affiche': film['affiche'],
'synopsis': film['synopsis']
})
print(f"Node '{film['titre']}' created successfully with details!")
def recupererDataFilm(nomFilm, realisateur):
cleaned_movie_name = encode_node_name(nomFilm)
print(cleaned_movie_name)
movie_ref = ref.child(cleaned_movie_name)
# Lire les données du nœud
movie_data = movie_ref.get()
if movie_data:
# Vérifier si le réalisateur correspond
if movie_data.get('realisateur') == realisateur:
return movie_data
else:
return 0
else:
return 0
def supprimerTousLesFilms():
root_ref = ref
root_ref.delete()
print("Tous les films ont été supprimés.")

153
modules/scraping.py Normal file
View file

@ -0,0 +1,153 @@
from bs4 import BeautifulSoup
import requests
import requests_cache
from datetime import timedelta
from modules.firebase import enregistrementFilm, recupererDataFilm
requests_cache.install_cache('film_cache', expire_after=timedelta(minutes=5))
# Récolte les données
def scrap_infoFilm(url, cinema):
films = []
response = requests.get(url)
reponse_text = response.text
soupReponse = BeautifulSoup(reponse_text, 'html.parser')
# films_list = soupReponse.find('div', class_="showtimes-list-holder").find_all('div', class_="card entity-card entity-card-list movie-card-theater cf hred")
films_list_container = soupReponse.find('div', class_="showtimes-list-holder")
if films_list_container:
films_list = films_list_container.find_all('div', class_="card entity-card entity-card-list movie-card-theater cf hred")
for film in films_list:
titre = film.find("div", class_="meta").find('h2', class_="meta-title").find("a").get_text()
realisateur_section = film.find("div", class_="meta-body-item meta-body-direction")
if realisateur_section:
realisateur = realisateur_section.find('span', class_="dark-grey-link").get_text()
else:
realisateur = "Réalisateur non trouvé"
dataFilm_firebase = recupererDataFilm(titre, realisateur)
if dataFilm_firebase == 0:
# Extraction de l'image
thumbnail_img = film.find('img', class_='thumbnail-img')
if thumbnail_img and not thumbnail_img['src'].startswith('data:image'):
img_url = thumbnail_img['src']
else:
urlAffiche = "https://www.allocine.fr" + film.find("div", class_="meta").find('h2', class_="meta-title").find("a")['href']
responseAffiche = requests.get(urlAffiche)
pageFilm = BeautifulSoup(responseAffiche.text, 'html.parser')
thumbnail_img = pageFilm.find('img', class_='thumbnail-img')
img_url = thumbnail_img['src'] if thumbnail_img and not thumbnail_img['src'].startswith('data:image') else 'Image de la vignette non trouvée'
synopsis = film.find('div', class_="synopsis").find('div', class_="content-txt").get_text() if film.find('div', class_="synopsis") else "synopsis non trouvé"
acteur_container = film.find("div", class_="meta-body-item meta-body-actor")
acteurs = [acteur.get_text() for acteur in acteur_container.find_all("span", class_="dark-grey-link")] if acteur_container else ["acteurs non trouvés"]
horaire_sections = film.find_all("div", class_="showtimes-hour-block")
horaires = [horaire_section.find('span', class_="showtimes-hour-item-value").get_text() for horaire_section in horaire_sections if horaire_section.find('span', class_="showtimes-hour-item-value")] or ["Horaire non trouvé"]
genre_container = film.find("div", class_="meta-body-item meta-body-info")
genres = [span.get_text().strip() for span in genre_container.find_all("span") if 'class' in span.attrs and not span.attrs['class'][0].startswith('spacer') and 'nationality' not in span.attrs['class']] if genre_container else ["Genre non trouvé"]
if genres: genres.pop(0)
# Récupération de la durée du film
url = "https://api.themoviedb.org/3/search/movie?query=" + titre
headers = {
"accept": "application/json",
"Authorization": "###"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
data = response.json()
if data['results']:
film_id = data['results'][0]['id']
url = "https://api.themoviedb.org/3/movie/" + str(film_id)
response = requests.get(url, headers=headers)
data = response.json()
duree_film = data['runtime']
heure = duree_film // 60
minute = duree_film % 60
else:
heure = 0
minute = 0
else:
heure = 0
minute = 0
film_data = {
"titre": titre,
"realisateur": realisateur,
"casting": acteurs,
"genres": genres,
"duree": {"heure": heure, "minute": minute},
"affiche": img_url,
"synopsis": synopsis,
"horaires": [
{
"cinema": cinema,
"seances": horaires
}
]
}
enregistrementFilm(film_data)
print(f"{film_data['titre']} : enregistré dans la db")
else:
horaire_sections = film.find_all("div", class_="showtimes-hour-block")
horaires = [horaire_section.find('span', class_="showtimes-hour-item-value").get_text() for horaire_section in horaire_sections if horaire_section.find('span', class_="showtimes-hour-item-value")] or ["Horaire non trouvé"]
film_data = {
"titre": dataFilm_firebase['titre'],
"realisateur": dataFilm_firebase['realisateur'],
"casting": dataFilm_firebase['casting'],
'genres': ['Drame', 'Romance'],
"duree": dataFilm_firebase['duree'],
"affiche": dataFilm_firebase['affiche'],
"synopsis": dataFilm_firebase['synopsis'],
"horaires": [
{
"cinema": cinema,
"seances": horaires
}
]
}
print(f"{film_data['titre']} : récupéré dans la db")
# Ajout du film s'il n'existe pas déjà
existing_film = next((f for f in films if f["titre"] == titre), None)
if existing_film:
existing_film["horaires"].append({
"cinema": cinema,
"seances": horaires
})
else:
films.append(film_data)
else:
print(f"L'élément 'showtimes-list-holder' n'a pas été trouvé pour l'URL {url}")
films_list = []
return films
def get_data(cinemas):
films = []
for cinema in cinemas:
result = scrap_infoFilm(cinema["url"], cinema["salle"])
films.extend(result)
return films
def cleanFilms(films):
filmsClean = []
for film in films:
existing_film = next((f for f in filmsClean if f["titre"] == film["titre"]), None)
if existing_film:
existing_film["horaires"].append({
"cinema": film["horaires"][0]["cinema"],
"seances": film["horaires"][0]["seances"]
})
else:
filmsClean.append(film)
return filmsClean

40
modules/urlGenerator.py Normal file
View file

@ -0,0 +1,40 @@
import time
from datetime import datetime
import datetime
def decalageDate(baseURL, decalage):
today = datetime.datetime.today()
day = today.day
year = today.year
month = today.month
if month in [1, 3, 5, 7, 8, 10, 12]:
max_days = 31
elif month == 2:
if (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):
max_days = 29
else:
max_days = 28
else:
max_days = 30
if day + decalage > max_days:
if month + 1 > 12:
year = year + 1
month = 1
day = day + decalage - max_days
url = baseURL + str(year) + "-" + str(month).zfill(2) + "-" + str(day).zfill(2)
print(url)
return url
else:
day = day + decalage - max_days
url = baseURL + str(year) + "-" + str(month).zfill(2) + "-" + str(day).zfill(2)
print(url)
return url
else:
day = day + decalage
url = baseURL + str(year) + "-" + str(month).zfill(2) + "-" + str(day).zfill(2)
print(url)
return url