Veri Analizi

UWburak · 30 May 2023

Ödevi bitirdiğim için burada paylaşmak istedim . Yolcu gemileri üzerinden hesaplamalar yaptım . DataFrame kısmında hala çalışmakta olan gemileri alamadım ondan dolayı sadece servis dışı gemiler üzerinden devam ettim .

"vesselfinder.com" web sitesinden gemi verilerini çekerek analizler yapmayı amaçlamaktadır. Aşağıdaki fonksiyonlar kullanılarak veri çekme ve analizler gerçekleştirilmiştir:

data() fonksiyonu: Bu fonksiyon, gemi verilerini çekeceğimiz URL'leri "sites.txt" adlı bir dosyaya kaydetmektedir. Bu URL'ler, farklı sayfalardaki gemi listelerini temsil etmektedir.
ships() fonksiyonu: Bu fonksiyon, "sites.txt" dosyasındaki URL'leri okuyarak her bir geminin ayrıntılarının bulunduğu sayfalara erişim sağlamaktadır. Bu sayfalardan gemiye ait detayların bulunduğu URL'leri "shiplinks.txt" adlı bir dosyaya kaydetmektedir.
shipsfeature() fonksiyonu: Bu fonksiyon, "shiplinks.txt" dosyasındaki gemi URL'lerini okuyarak her bir gemi için özellikleri çekmektedir. Özellikler ve değerleri, "shipsfeatures.txt" adlı bir dosyada saklanmaktadır.
dataFrame() fonksiyonu: Bu fonksiyon, "shipsfeatures.txt" dosyasındaki verileri okuyarak bir veri çerçevesi oluşturmaktadır. Veri çerçevesi üzerinde çeşitli hesaplamalar yapılarak gemi özelliklerinin normal ve log-normal dağılımları analiz edilmektedir. Oluşturulan grafikler, gemi özelliklerinin dağılımlarını göstermektedir. Bu fonksiyon, veri çerçevesini ve grafikleri ekrana bastırmaktadır.

Sonuç olarak, bu çalışma ile "vesselfinder.com" web sitesinden gemi verileri çekilerek gemi özelliklerinin normal ve log-normal dağılımları incelenmektedir. Elde edilen veri çerçevesi ve grafikler, gemi özelliklerinin dağılımını görselleştirmektedir.

Python:

from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, lognorm

def data():
    n = 1
    dosya_adı = "sites.txt"
    with open(dosya_adı, "w") as dosya:
        while n < 48:
            url = f"https://www.vesselfinder.com/vessels?page={n}&minLength=100&maxLength=150&type=3&sort=2&dir=2"
            dosya.write(url + "\n")
            n += 1

def ships():
    headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
    dosya_adı = "sites.txt"
    dosya_adı1="shiplinks.txt"
    with open(dosya_adı, "r") as dosya:
        with open(dosya_adı1, "w") as ship_url:
            satirlar = dosya.readlines()
            for url in satirlar:         
                response = requests.get(url.strip(), headers=headers)
                if response.status_code == 200:
                    soup = BeautifulSoup(response.content, "html.parser")
                    ship_links = soup.find_all(class_="ship-link")
                    for ship in ship_links:
                        href = ship.get("href")
                        ship_url.write(f'https://www.vesselfinder.com/{href}' + "\n")
                else:
                    print(f"Hata: {response.status_code} - İstek başarısız.")

def shipsfeature():
        headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
        dosya_adı= "shiplinks.txt"
        dosya_adı1= "shipsfeatures.txt"
        n3= []
        v3 = []
        with open(dosya_adı, "r") as shiplinks:
                with open(dosya_adı1, "w") as shipsfeatures:
                    ships = shiplinks.readlines()
                    for ship in ships:
                        response = requests.get(ship.strip(), headers=headers)
                        if response.status_code == 200:                   
                            soup = BeautifulSoup(response.content, "html.parser")
                            master_data= soup.find('div', class_="col npr vfix-top vfix-bottom")
                            features = master_data.find_all("tr")                         
                            for feature in features:
                                n3_element=feature.find(class_="n3")
                                v3_element=feature.find(class_="v3")

                                if n3_element:
                                    n3.append(n3_element.get_text(strip=True))
                                else:
                                    n3.append("")
                                if v3_element:
                                    v3.append(v3_element.get_text(strip=True))
                                else:
                                    v3.append("")                         
                            for n,v in zip(n3,v3):
                                shipsfeatures.write(f"{n:<50} {v}\n")
                                                                                                                                                                                          
                        else:
                            print(f"Hata: {response.status_code} - İstek başarısız.")
      



def dataFrame():
    dosya_adi = "shipsfeatures.txt"
    df = pd.read_csv(dosya_adi, sep="\s\s+", engine="python")
    df.columns = ["Features" , "Values"]
  
    data = []
    current_ship = {}

    for index, row in df.iterrows():
        feature = row['Features']
        value = row['Values']
      
        if feature == 'Status':
            if current_ship:
                data.append(current_ship)
                current_ship = {}
      
        current_ship[feature] = value

    data.append(current_ship)

    new_df = pd.DataFrame(data)

    new_df["IMO number"] = new_df["IMO number"].astype(int)
    new_df["Gross Tonnage"] = pd.to_numeric(new_df["Gross Tonnage"], errors="coerce")
    new_df["Summer Deadweight (t)"] = pd.to_numeric(new_df["Summer Deadweight (t)"], errors="coerce")
    new_df["Length Overall (m)"] = new_df["Length Overall (m)"].astype(float)
    new_df["Beam (m)"] = new_df["Beam (m)"].replace('-', np.nan).astype(float).round(2)
    new_df["Year of Built"] = new_df["Year of Built"].astype(int)

    built_year = new_df["Year of Built"].dropna()
    gross_tonnage = new_df["Gross Tonnage"].dropna()
    dwt = new_df["Summer Deadweight (t)"].dropna()
    length = new_df["Length Overall (m)"].dropna()
    breadth = new_df["Beam (m)"].dropna()

    def normal_density(x, mean, std):
        density = (1 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std) ** 2)
        return density

    def lognormal_density(x, mean, std):
        density = (1 / (x * std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((np.log(x) - mean) / std) ** 2)
        return density

     # Built Year için normal ve log-normal dağılım
    built_year = new_df["Year of Built"].dropna()
    built_mean = built_year.mean()
    built_std = built_year.std()

    x = np.linspace(built_year.min(), built_year.max(), 100)
    plt.figure(figsize=(10, 6))
    plt.subplot(2, 2, 1)
    plt.plot(x, normal_density(x, built_mean, built_std), label="Normal")
    plt.hist(built_year, density=True, bins=20, alpha=0.5)
    plt.xlabel("Built Year")
    plt.ylabel("Yoğunluk")
    plt.title("Built Year - Normal Dağılım")

    plt.subplot(2, 2, 2)
    plt.plot(x, lognormal_density(x, built_mean, built_std), label="Log-Normal")
    plt.hist(built_year, density=True, bins=20, alpha=0.5)
    plt.xlabel("Built Year")
    plt.ylabel("Yoğunluk")
    plt.title("Built Year - Log-Normal Dağılım")

    # Gross Tonnage için normal ve log-normal dağılım
    gross_tonnage = new_df["Gross Tonnage"].dropna()
    gross_mean = gross_tonnage.mean()
    gross_std = gross_tonnage.std()

    x = np.linspace(gross_tonnage.min(), gross_tonnage.max(), 100)
    plt.subplot(2, 2, 3)
    plt.plot(x, normal_density(x, gross_mean, gross_std), label="Normal")
    plt.hist(gross_tonnage, density=True, bins=20, alpha=0.5)
    plt.xlabel("Gross Tonnage")
    plt.ylabel("Yoğunluk")
    plt.title("Gross Tonnage - Normal Dağılım")

    plt.subplot(2, 2, 4)
    plt.plot(x, lognormal_density(x, gross_mean, gross_std), label="Log-Normal")
    plt.hist(gross_tonnage, density=True, bins=20, alpha=0.5)
    plt.xlabel("Gross Tonnage")
    plt.ylabel("Yoğunluk")
    plt.title("Gross Tonnage - Log-Normal Dağılım")

    # Breadth için normal ve log-normal dağılım
    breadth = new_df["Beam (m)"].dropna()
    breadth_mean = breadth.mean()
    breadth_std = breadth.std()

    x = np.linspace(breadth.min(), breadth.max(), 100)
    plt.figure(figsize=(10, 6))
    plt.subplot(2, 2, 1)
    plt.plot(x, normal_density(x, breadth_mean, breadth_std), label="Normal")
    plt.hist(breadth, density=True, bins=20, alpha=0.5)
    plt.xlabel("Breadth (m)")
    plt.ylabel("Yoğunluk")
    plt.title("Breadth - Normal Dağılım")

    plt.subplot(2, 2, 2)
    plt.plot(x, lognormal_density(x, breadth_mean, breadth_std), label="Log-Normal")
    plt.hist(breadth, density=True, bins=20, alpha=0.5)
    plt.xlabel("Breadth (m)")
    plt.ylabel("Yoğunluk")
    plt.title("Breadth - Log-Normal Dağılım")

    # DWT Length için normal ve log-normal dağılım
    dwt_length = new_df["Length Overall (m)"].dropna()
    dwt_mean = dwt_length.mean()
    dwt_std = dwt_length.std()

    x = np.linspace(dwt_length.min(), dwt_length.max(), 100)
    plt.subplot(2, 2, 3)
    plt.plot(x, normal_density(x, dwt_mean, dwt_std), label="Normal")
    plt.hist(dwt_length, density=True, bins=20, alpha=0.5)
    plt.xlabel("DWT Length (m)")
    plt.ylabel("Yoğunluk")
    plt.title("DWT Length - Normal Dağılım")

    plt.subplot(2, 2, 4)
    plt.plot(x, lognormal_density(x, dwt_mean, dwt_std), label="Log-Normal")
    plt.hist(dwt_length, density=True, bins=20, alpha=0.5)
    plt.xlabel("DWT Length (m)")
    plt.ylabel("Yoğunluk")
    plt.title("DWT Length - Log-Normal Dağılım")
  
    plt.tight_layout()
    plt.show()


data()
input("Buraya Tıklayıp Enter Basınız . ")
ships()
input("Buraya Tıklayıp Enter Basınız . ")
shipsfeature()
input("Buraya Tıklayıp Enter Basınız . ")
dataFrame()

avnii sezer · 11 Haz 2023

kodlamayı isterdim hep oyun yapmayı ama çok zor geliyor

kollar40cm · 11 Haz 2023

eline sağlık reis

Ara

Veri Analizi

UWburak

avnii sezer

kollar40cm

Benzer Konular