Voltar ao blog
Scrapping Post #3

Como Raspar dados do Discord

Por Leonam 28 de mar. 2026 1 min de leitura

Como raspar (para backup) dados do Discord utilizando Java + Postgres SQL - Mapa Mental

AVISO IMPORTANTE: Você deve estar autorizado a fazer isso. Senão se configura CRIME. Faça somente em ambientes que você possua expressa autorização. Aqui eu estou apenas disseminando o conhecimento.

Lista do que você precisa:

  • Spring Boot
  • JDA (Java Discord API)
  • Spring Data JPA
  • PostgreSQL
  • (Opcional) Muitas contas do Discord para rodar em paralelo.

Entidades de um Banco de Dados

Modelagem de Dados (Entidades)

1. ChannelEntity

import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;

@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
        name = "discord_channels",
        indexes = {
                @Index(name = "idx_discord_channels_discord_id", columnList = "discord_channel_id", unique = true),
                @Index(name = "idx_discord_channels_guild_id", columnList = "guild_id")
        }
)
public class ChannelEntity {
    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    @Column(name = "discord_channel_id", nullable = false, unique = true, length = 32)
    private String discordChannelId;

    @ManyToOne(fetch = FetchType.LAZY)
    @JoinColumn(name = "guild_id")
    private GuildEntity guild;

    @Column(name = "name", nullable = false, length = 255)
    private String name;

    @Column(name = "channel_type", nullable = false, length = 64)
    private String channelType;

    @Column(name = "parent_discord_channel_id", length = 32)
    private String parentDiscordChannelId;

    @Column(name = "created_at", nullable = false)
    private OffsetDateTime createdAt;

    @Column(name = "updated_at", nullable = false)
    private OffsetDateTime updatedAt;
}

2. DiscordMessageBackfillStateEntity

package top.lmix.lmixbot.feature.discord.entity;

import jakarta.persistence.*;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import lombok.*;
import top.lmix.lmixbot.feature.discord.backfill.DiscordMessageBackfillStatus;
import java.time.OffsetDateTime;

@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
        name = "discord_message_backfill_states",
        indexes = {
                @Index(name = "idx_discord_message_backfill_channel_id", columnList = "discord_channel_id", unique = true),
                @Index(name = "idx_discord_message_backfill_status", columnList = "status, updated_at, id"),
                @Index(name = "idx_discord_message_backfill_guild_id", columnList = "discord_guild_id")
        }
)
public class DiscordMessageBackfillStateEntity {
    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    @Column(name = "discord_guild_id", nullable = false, length = 32)
    private String discordGuildId;

    @Column(name = "discord_channel_id", nullable = false, unique = true, length = 32)
    private String discordChannelId;

    @Column(name = "parent_discord_channel_id", length = 32)
    private String parentDiscordChannelId;

    @Column(name = "channel_name", nullable = false, length = 255)
    private String channelName;

    @Column(name = "channel_type", nullable = false, length = 64)
    private String channelType;

    @JdbcTypeCode(SqlTypes.VARCHAR)
    @Enumerated(EnumType.STRING)
    @Column(name = "status", nullable = false, length = 32)
    private DiscordMessageBackfillStatus status;

    @Column(name = "next_before_message_id", length = 32)
    private String nextBeforeMessageId;

    @Column(name = "last_seen_message_id", length = 32)
    private String lastSeenMessageId;

    @Column(name = "total_fetched_messages", nullable = false)
    private long totalFetchedMessages;

    @Column(name = "request_count", nullable = false)
    private long requestCount;

    @Column(name = "failure_count", nullable = false)
    private int failureCount;

    @Column(name = "last_error", columnDefinition = "TEXT")
    private String lastError;

    @Column(name = "last_attempt_at")
    private OffsetDateTime lastAttemptAt;

    @Column(name = "last_success_at")
    private OffsetDateTime lastSuccessAt;

    @Column(name = "completed_at")
    private OffsetDateTime completedAt;

    @Column(name = "created_at", nullable = false)
    private OffsetDateTime createdAt;

    @Column(name = "updated_at", nullable = false)
    private OffsetDateTime updatedAt;
}

3. GuildEntity

package top.lmix.lmixbot.feature.discord.entity;

import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;

@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
        name = "discord_guilds",
        indexes = {
                @Index(name = "idx_discord_guilds_discord_id", columnList = "discord_guild_id", unique = true)
        }
)
public class GuildEntity {
    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    @Column(name = "discord_guild_id", nullable = false, unique = true, length = 32)
    private String discordGuildId;

    @Column(name = "name", nullable = false, length = 255)
    private String name;

    @Column(name = "icon_url", length = 1024)
    private String iconUrl;

    @Column(name = "created_at", nullable = false)
    private OffsetDateTime createdAt;

    @Column(name = "updated_at", nullable = false)
    private OffsetDateTime updatedAt;
}

4. GuildMemberEntity

package top.lmix.lmixbot.feature.discord.entity;

import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;

@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
        name = "discord_guild_members",
        uniqueConstraints = {
                @UniqueConstraint(name = "uk_guild_member_user_guild", columnNames = {"user_id", "guild_id"})
        },
        indexes = {
                @Index(name = "idx_guild_member_user_id", columnList = "user_id"),
                @Index(name = "idx_guild_member_guild_id", columnList = "guild_id")
        }
)
public class GuildMemberEntity {
    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    @ManyToOne(fetch = FetchType.LAZY, optional = false)
    @JoinColumn(name = "user_id", nullable = false)
    private UserEntity user;

    @ManyToOne(fetch = FetchType.LAZY, optional = false)
    @JoinColumn(name = "guild_id", nullable = false)
    private GuildEntity guild;

    @Column(name = "nickname", length = 255)
    private String nickname;

    @Column(name = "effective_name", length = 255)
    private String effectiveName;

    @Column(name = "avatar_url", length = 1024)
    private String avatarUrl;

    @Column(name = "pending", nullable = false)
    private boolean pending;

    @Column(name = "joined_at_discord")
    private OffsetDateTime joinedAtDiscord;

    @Column(name = "boosted_at_discord")
    private OffsetDateTime boostedAtDiscord;

    @Column(name = "created_at", nullable = false)
    private OffsetDateTime createdAt;

    @Column(name = "updated_at", nullable = false)
    private OffsetDateTime updatedAt;
}

5. MessageEntity

package top.lmix.lmixbot.feature.discord.entity;

import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;

@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
        name = "discord_messages",
        indexes = {
                @Index(name = "idx_discord_messages_discord_id", columnList = "discord_message_id", unique = true),
                @Index(name = "idx_discord_messages_channel_id", columnList = "channel_id"),
                @Index(name = "idx_discord_messages_author_id", columnList = "author_id"),
                @Index(name = "idx_discord_messages_guild_id", columnList = "guild_id"),
                @Index(name = "idx_discord_messages_created_at_discord", columnList = "created_at_discord"),
                @Index(name = "idx_discord_messages_guild_author_created", columnList = "guild_id, author_id, created_at_discord, id")
        }
)
public class MessageEntity {
    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    @Column(name = "discord_message_id", nullable = false, unique = true, length = 32)
    private String discordMessageId;

    @ManyToOne(fetch = FetchType.LAZY, optional = false)
    @JoinColumn(name = "author_id", nullable = false)
    private UserEntity author;

    @ManyToOne(fetch = FetchType.LAZY)
    @JoinColumn(name = "guild_id")
    private GuildEntity guild;

    @ManyToOne(fetch = FetchType.LAZY)
    @JoinColumn(name = "guild_member_id")
    private GuildMemberEntity guildMember;

    @ManyToOne(fetch = FetchType.LAZY, optional = false)
    @JoinColumn(name = "channel_id", nullable = false)
    private ChannelEntity channel;

    @Column(name = "referenced_discord_message_id", length = 32)
    private String referencedDiscordMessageId;

    @Column(name = "message_type", nullable = false, length = 64)
    private String messageType;

    @Column(name = "content_raw", columnDefinition = "TEXT")
    private String contentRaw;

    @Column(name = "content_display", columnDefinition = "TEXT")
    private String contentDisplay;

    @Column(name = "content_stripped", columnDefinition = "TEXT")
    private String contentStripped;

    @Column(name = "jump_url", length = 1024)
    private String jumpUrl;

    @Column(name = "nonce", length = 64)
    private String nonce;

    @Column(name = "application_id", length = 32)
    private String applicationId;

    @Column(name = "message_flags", length = 512)
    private String messageFlags;

    @Column(name = "attachment_count", nullable = false)
    private int attachmentCount;

    @Column(name = "embed_count", nullable = false)
    private int embedCount;

    @Column(name = "reaction_count", nullable = false)
    private int reactionCount;

    @Column(name = "sticker_count", nullable = false)
    private int stickerCount;

    @Column(name = "component_count", nullable = false)
    private int componentCount;

    @Column(name = "mentioned_user_count", nullable = false)
    private int mentionedUserCount;

    @Column(name = "mentioned_channel_count", nullable = false)
    private int mentionedChannelCount;

    @Column(name = "mentioned_role_count", nullable = false)
    private int mentionedRoleCount;

    @Column(name = "mentioned_member_count", nullable = false)
    private int mentionedMemberCount;

    @Column(name = "tts_message", nullable = false)
    private boolean ttsMessage;

    @Column(name = "pinned_message", nullable = false)
    private boolean pinnedMessage;

    @Column(name = "edited_message", nullable = false)
    private boolean editedMessage;

    @Column(name = "webhook_message", nullable = false)
    private boolean webhookMessage;

    @Column(name = "from_guild", nullable = false)
    private boolean fromGuild;

    @Column(name = "suppressed_embeds", nullable = false)
    private boolean suppressedEmbeds;

    @Column(name = "ephemeral_message", nullable = false)
    private boolean ephemeralMessage;

    @Column(name = "created_at_discord", nullable = false)
    private OffsetDateTime createdAtDiscord;

    @Column(name = "edited_at_discord")
    private OffsetDateTime editedAtDiscord;

    @Column(name = "persisted_at", nullable = false)
    private OffsetDateTime persistedAt;

    @Column(name = "updated_at", nullable = false)
    private OffsetDateTime updatedAt;
}

6. UserEntity

package top.lmix.lmixbot.feature.discord.entity;

import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;

@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
        name = "discord_users",
        indexes = {
                @Index(name = "idx_discord_users_discord_id", columnList = "discord_user_id", unique = true)
        }
)
public class UserEntity {
    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    @Column(name = "discord_user_id", nullable = false, unique = true, length = 32)
    private String discordUserId;

    @Column(name = "username", nullable = false, length = 255)
    private String username;

    @Column(name = "global_name", length = 255)
    private String globalName;

    @Column(name = "user_tag", length = 255)
    private String userTag;

    @Column(name = "discriminator", length = 16)
    private String discriminator;

    @Column(name = "avatar_url", length = 1024)
    private String avatarUrl;

    @Column(name = "bot_user", nullable = false)
    private boolean botUser;

    @Column(name = "discord_system_user", nullable = false)
    private boolean systemUser;

    @Column(name = "created_at", nullable = false)
    private OffsetDateTime createdAt;

    @Column(name = "updated_at", nullable = false)
    private OffsetDateTime updatedAt;
}

Implementação do Serviço de Backfill

Lógica Principal

O sistema de Backfill coleta tanto as mensagens retroativamente quanto as novas mensagens a partir de um ponto.

package top.lmix.lmixbot.feature.discord.backfill;

import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import net.dv8tion.jda.api.JDA;
import net.dv8tion.jda.api.Permission;
import net.dv8tion.jda.api.entities.*;
import net.dv8tion.jda.api.entities.channel.attribute.*;
import net.dv8tion.jda.api.entities.channel.concrete.ThreadChannel;
import net.dv8tion.jda.api.entities.channel.middleman.*;
import net.dv8tion.jda.api.requests.GatewayIntent;
import org.springframework.stereotype.Service;
import top.lmix.lmixbot.feature.discord.entity.DiscordMessageBackfillStateEntity;
import top.lmix.lmixbot.feature.discord.repository.DiscordMessageBackfillStateRepository;
import top.lmix.lmixbot.feature.discord.service.DiscordMessagePersistenceService;
import java.time.OffsetDateTime;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;

@Slf4j
@Service
@RequiredArgsConstructor
public class DiscordMessageBackfillService {
    // ... Implementação completa do loop de busca e persistência ...
    // (Omitido para brevidade no mapa mental, mas deve incluir a lógica de fetchMessagesBefore e fetchMessagesAfter)
}

DiscordMessageBackfillStatus (Enum)

package top.lmix.lmixbot.feature.discord.backfill;

public enum DiscordMessageBackfillStatus {
    PENDING,
    RUNNING,
    FAILED_RETRY,
    COMPLETED,
    SKIPPED_NO_ACCESS
}

Dicas de Performance e Rate Limit

  1. Rate Limits: A API do Discord permite solicitar 100 mensagens por vez. Aguarde cerca de 1,3 segundos entre as requisições para evitar o erro HTTP 429.
  2. Paralelismo: Se for realizar o backup em vários servidores, utilize instâncias com tokens diferentes para cada uma, otimizando o tempo total de coleta.
  3. Persistência: O PostgreSQL é ideal para este cenário devido à sua performance superior em workloads complexos e concorrência (MVCC).

Nos meus testes consegui por volta de 165 MB de dados em um servidor de amigos no qual eu estava autorizado a testar a lameragem.

Quantidade de dados varridos