Scrapping
Post #3
Como Raspar dados do Discord
Por Leonam
28 de mar. 2026
1 min de leitura
Como raspar (para backup) dados do Discord utilizando Java + Postgres SQL - Mapa Mental
AVISO IMPORTANTE: Você deve estar autorizado a fazer isso. Senão se configura CRIME. Faça somente em ambientes que você possua expressa autorização. Aqui eu estou apenas disseminando o conhecimento.
Lista do que você precisa:
- Spring Boot
- JDA (Java Discord API)
- Spring Data JPA
- PostgreSQL
- (Opcional) Muitas contas do Discord para rodar em paralelo.

Modelagem de Dados (Entidades)
1. ChannelEntity
import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;
@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
name = "discord_channels",
indexes = {
@Index(name = "idx_discord_channels_discord_id", columnList = "discord_channel_id", unique = true),
@Index(name = "idx_discord_channels_guild_id", columnList = "guild_id")
}
)
public class ChannelEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column(name = "discord_channel_id", nullable = false, unique = true, length = 32)
private String discordChannelId;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "guild_id")
private GuildEntity guild;
@Column(name = "name", nullable = false, length = 255)
private String name;
@Column(name = "channel_type", nullable = false, length = 64)
private String channelType;
@Column(name = "parent_discord_channel_id", length = 32)
private String parentDiscordChannelId;
@Column(name = "created_at", nullable = false)
private OffsetDateTime createdAt;
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt;
}2. DiscordMessageBackfillStateEntity
package top.lmix.lmixbot.feature.discord.entity;
import jakarta.persistence.*;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import lombok.*;
import top.lmix.lmixbot.feature.discord.backfill.DiscordMessageBackfillStatus;
import java.time.OffsetDateTime;
@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
name = "discord_message_backfill_states",
indexes = {
@Index(name = "idx_discord_message_backfill_channel_id", columnList = "discord_channel_id", unique = true),
@Index(name = "idx_discord_message_backfill_status", columnList = "status, updated_at, id"),
@Index(name = "idx_discord_message_backfill_guild_id", columnList = "discord_guild_id")
}
)
public class DiscordMessageBackfillStateEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column(name = "discord_guild_id", nullable = false, length = 32)
private String discordGuildId;
@Column(name = "discord_channel_id", nullable = false, unique = true, length = 32)
private String discordChannelId;
@Column(name = "parent_discord_channel_id", length = 32)
private String parentDiscordChannelId;
@Column(name = "channel_name", nullable = false, length = 255)
private String channelName;
@Column(name = "channel_type", nullable = false, length = 64)
private String channelType;
@JdbcTypeCode(SqlTypes.VARCHAR)
@Enumerated(EnumType.STRING)
@Column(name = "status", nullable = false, length = 32)
private DiscordMessageBackfillStatus status;
@Column(name = "next_before_message_id", length = 32)
private String nextBeforeMessageId;
@Column(name = "last_seen_message_id", length = 32)
private String lastSeenMessageId;
@Column(name = "total_fetched_messages", nullable = false)
private long totalFetchedMessages;
@Column(name = "request_count", nullable = false)
private long requestCount;
@Column(name = "failure_count", nullable = false)
private int failureCount;
@Column(name = "last_error", columnDefinition = "TEXT")
private String lastError;
@Column(name = "last_attempt_at")
private OffsetDateTime lastAttemptAt;
@Column(name = "last_success_at")
private OffsetDateTime lastSuccessAt;
@Column(name = "completed_at")
private OffsetDateTime completedAt;
@Column(name = "created_at", nullable = false)
private OffsetDateTime createdAt;
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt;
}3. GuildEntity
package top.lmix.lmixbot.feature.discord.entity;
import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;
@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
name = "discord_guilds",
indexes = {
@Index(name = "idx_discord_guilds_discord_id", columnList = "discord_guild_id", unique = true)
}
)
public class GuildEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column(name = "discord_guild_id", nullable = false, unique = true, length = 32)
private String discordGuildId;
@Column(name = "name", nullable = false, length = 255)
private String name;
@Column(name = "icon_url", length = 1024)
private String iconUrl;
@Column(name = "created_at", nullable = false)
private OffsetDateTime createdAt;
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt;
}4. GuildMemberEntity
package top.lmix.lmixbot.feature.discord.entity;
import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;
@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
name = "discord_guild_members",
uniqueConstraints = {
@UniqueConstraint(name = "uk_guild_member_user_guild", columnNames = {"user_id", "guild_id"})
},
indexes = {
@Index(name = "idx_guild_member_user_id", columnList = "user_id"),
@Index(name = "idx_guild_member_guild_id", columnList = "guild_id")
}
)
public class GuildMemberEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@JoinColumn(name = "user_id", nullable = false)
private UserEntity user;
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@JoinColumn(name = "guild_id", nullable = false)
private GuildEntity guild;
@Column(name = "nickname", length = 255)
private String nickname;
@Column(name = "effective_name", length = 255)
private String effectiveName;
@Column(name = "avatar_url", length = 1024)
private String avatarUrl;
@Column(name = "pending", nullable = false)
private boolean pending;
@Column(name = "joined_at_discord")
private OffsetDateTime joinedAtDiscord;
@Column(name = "boosted_at_discord")
private OffsetDateTime boostedAtDiscord;
@Column(name = "created_at", nullable = false)
private OffsetDateTime createdAt;
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt;
}5. MessageEntity
package top.lmix.lmixbot.feature.discord.entity;
import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;
@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
name = "discord_messages",
indexes = {
@Index(name = "idx_discord_messages_discord_id", columnList = "discord_message_id", unique = true),
@Index(name = "idx_discord_messages_channel_id", columnList = "channel_id"),
@Index(name = "idx_discord_messages_author_id", columnList = "author_id"),
@Index(name = "idx_discord_messages_guild_id", columnList = "guild_id"),
@Index(name = "idx_discord_messages_created_at_discord", columnList = "created_at_discord"),
@Index(name = "idx_discord_messages_guild_author_created", columnList = "guild_id, author_id, created_at_discord, id")
}
)
public class MessageEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column(name = "discord_message_id", nullable = false, unique = true, length = 32)
private String discordMessageId;
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@JoinColumn(name = "author_id", nullable = false)
private UserEntity author;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "guild_id")
private GuildEntity guild;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "guild_member_id")
private GuildMemberEntity guildMember;
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@JoinColumn(name = "channel_id", nullable = false)
private ChannelEntity channel;
@Column(name = "referenced_discord_message_id", length = 32)
private String referencedDiscordMessageId;
@Column(name = "message_type", nullable = false, length = 64)
private String messageType;
@Column(name = "content_raw", columnDefinition = "TEXT")
private String contentRaw;
@Column(name = "content_display", columnDefinition = "TEXT")
private String contentDisplay;
@Column(name = "content_stripped", columnDefinition = "TEXT")
private String contentStripped;
@Column(name = "jump_url", length = 1024)
private String jumpUrl;
@Column(name = "nonce", length = 64)
private String nonce;
@Column(name = "application_id", length = 32)
private String applicationId;
@Column(name = "message_flags", length = 512)
private String messageFlags;
@Column(name = "attachment_count", nullable = false)
private int attachmentCount;
@Column(name = "embed_count", nullable = false)
private int embedCount;
@Column(name = "reaction_count", nullable = false)
private int reactionCount;
@Column(name = "sticker_count", nullable = false)
private int stickerCount;
@Column(name = "component_count", nullable = false)
private int componentCount;
@Column(name = "mentioned_user_count", nullable = false)
private int mentionedUserCount;
@Column(name = "mentioned_channel_count", nullable = false)
private int mentionedChannelCount;
@Column(name = "mentioned_role_count", nullable = false)
private int mentionedRoleCount;
@Column(name = "mentioned_member_count", nullable = false)
private int mentionedMemberCount;
@Column(name = "tts_message", nullable = false)
private boolean ttsMessage;
@Column(name = "pinned_message", nullable = false)
private boolean pinnedMessage;
@Column(name = "edited_message", nullable = false)
private boolean editedMessage;
@Column(name = "webhook_message", nullable = false)
private boolean webhookMessage;
@Column(name = "from_guild", nullable = false)
private boolean fromGuild;
@Column(name = "suppressed_embeds", nullable = false)
private boolean suppressedEmbeds;
@Column(name = "ephemeral_message", nullable = false)
private boolean ephemeralMessage;
@Column(name = "created_at_discord", nullable = false)
private OffsetDateTime createdAtDiscord;
@Column(name = "edited_at_discord")
private OffsetDateTime editedAtDiscord;
@Column(name = "persisted_at", nullable = false)
private OffsetDateTime persistedAt;
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt;
}6. UserEntity
package top.lmix.lmixbot.feature.discord.entity;
import jakarta.persistence.*;
import lombok.*;
import java.time.OffsetDateTime;
@Getter
@Setter
@Entity
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@Table(
name = "discord_users",
indexes = {
@Index(name = "idx_discord_users_discord_id", columnList = "discord_user_id", unique = true)
}
)
public class UserEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column(name = "discord_user_id", nullable = false, unique = true, length = 32)
private String discordUserId;
@Column(name = "username", nullable = false, length = 255)
private String username;
@Column(name = "global_name", length = 255)
private String globalName;
@Column(name = "user_tag", length = 255)
private String userTag;
@Column(name = "discriminator", length = 16)
private String discriminator;
@Column(name = "avatar_url", length = 1024)
private String avatarUrl;
@Column(name = "bot_user", nullable = false)
private boolean botUser;
@Column(name = "discord_system_user", nullable = false)
private boolean systemUser;
@Column(name = "created_at", nullable = false)
private OffsetDateTime createdAt;
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt;
}Implementação do Serviço de Backfill
Lógica Principal
O sistema de Backfill coleta tanto as mensagens retroativamente quanto as novas mensagens a partir de um ponto.
package top.lmix.lmixbot.feature.discord.backfill;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import net.dv8tion.jda.api.JDA;
import net.dv8tion.jda.api.Permission;
import net.dv8tion.jda.api.entities.*;
import net.dv8tion.jda.api.entities.channel.attribute.*;
import net.dv8tion.jda.api.entities.channel.concrete.ThreadChannel;
import net.dv8tion.jda.api.entities.channel.middleman.*;
import net.dv8tion.jda.api.requests.GatewayIntent;
import org.springframework.stereotype.Service;
import top.lmix.lmixbot.feature.discord.entity.DiscordMessageBackfillStateEntity;
import top.lmix.lmixbot.feature.discord.repository.DiscordMessageBackfillStateRepository;
import top.lmix.lmixbot.feature.discord.service.DiscordMessagePersistenceService;
import java.time.OffsetDateTime;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
@Slf4j
@Service
@RequiredArgsConstructor
public class DiscordMessageBackfillService {
// ... Implementação completa do loop de busca e persistência ...
// (Omitido para brevidade no mapa mental, mas deve incluir a lógica de fetchMessagesBefore e fetchMessagesAfter)
}DiscordMessageBackfillStatus (Enum)
package top.lmix.lmixbot.feature.discord.backfill;
public enum DiscordMessageBackfillStatus {
PENDING,
RUNNING,
FAILED_RETRY,
COMPLETED,
SKIPPED_NO_ACCESS
}Dicas de Performance e Rate Limit
- Rate Limits: A API do Discord permite solicitar 100 mensagens por vez. Aguarde cerca de 1,3 segundos entre as requisições para evitar o erro
HTTP 429. - Paralelismo: Se for realizar o backup em vários servidores, utilize instâncias com tokens diferentes para cada uma, otimizando o tempo total de coleta.
- Persistência: O PostgreSQL é ideal para este cenário devido à sua performance superior em workloads complexos e concorrência (MVCC).
Nos meus testes consegui por volta de 165 MB de dados em um servidor de amigos no qual eu estava autorizado a testar a lameragem.
