一、tcp_transmit_skb
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask){ const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; struct tcp_sock *tp; struct tcp_skb_cb *tcb; struct tcp_out_options opts; unsigned tcp_options_size, tcp_header_size; struct tcp_md5sig_key *md5; struct tcphdr *th; int err; BUG_ON(!skb || !tcp_skb_pcount(skb)); /* If congestion control is doing timestamping, we must * take such a timestamp before we potentially clone/copy. */ /*如果拥塞控制需要做时间才有,则必须在克隆或者拷贝报文之前设置一个时间戳。 linux支持了多达十种拥塞控制算法,但并不是每种算中都需要做时间采样的, 因此在设置时间戳前先判断当前的拥塞算法是否需要做时间采样。*/ if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) __net_timestamp(skb); /*根据传递进来的clone_it参数来确定是否需要克隆待发送的报文。*/ if (likely(clone_it)) { /*如果skb已经被clone,则只能复制该skb的数据到新分配的skb中*/ if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else /*clone新的skb*/ skb = skb_clone(skb, gfp_mask); if (unlikely(!skb)) return -ENOBUFS; } /*获取INET层和TCP层的传输控制块、skb中的TCP私有数据块。*/ inet = inet_sk(sk); tp = tcp_sk(sk); tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); /*根据TCP选项重新调整TCP首部的长度。*/ /*判断当前TCP报文是否是SYN段,因为有些选项只能出现在SYN报文中,需做特别处理。*/ if (unlikely(tcb->flags & TCPHDR_SYN)) tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); else tcp_options_size = tcp_established_options(sk, skb, &opts, &md5); /*tcp首部的总长度等于可选长度加上struct tcphdr。*/ tcp_header_size = tcp_options_size + sizeof(struct tcphdr); /*如果已发出但未确认的数据包数目为零,则只初始化拥塞控制,并开始跟踪该连接的RTT。*/ if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); /*调用skb_push()在数据部分的头部添加TCP首部,长度即为之前计算得到的那个tcp_header_size,实际上是把data指针往上移。*/ skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); /*SKB已添加到发送队列中,但是从SKB的角度去看还不知道他是属于哪个传输控制块,因此调用skb_set_owner_w设置该SKB的宿主。*/ skb_set_owner_w(skb, sk); /* Build TCP header and checksum it. */ /*填充TCP首部中的源端口source、目的端口dest、TCP报文的序号seq、确认序号ack_seq以及各个标志位*/ th = tcp_hdr(skb); th->source = inet->inet_sport; th->dest = inet->inet_dport; th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); /*分两种情况设置TCP首部的接收窗口的大小*/ if (unlikely(tcb->flags & TCPHDR_SYN)) { /* RFC1323: The window in SYN & SYN/ACK segments * is never scaled. */ /*如果是SYN段,则设置接收窗口初始值为rcv_wnd*/ th->window = htons(min(tp->rcv_wnd, 65535U)); } else { /*如果是其他的报文,则调用tcp_select_window()计算当前接收窗口的大小。*/ th->window = htons(tcp_select_window(sk)); } /*初始化TCP首部的校验码和紧急指针,具体请参考TCP协议中的首部定义。*/ th->check = 0; th->urg_ptr = 0; /* The urg_mode check is necessary during a below snd_una win probe */ if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) { if (before(tp->snd_up, tcb->seq + 0x10000)) { th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { th->urg_ptr = htons(0xFFFF); th->urg = 1; } } tcp_options_write((__be32 *)(th + 1), tp, &opts); if (likely((tcb->flags & TCPHDR_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size);#ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { sk_nocaps_add(sk, NETIF_F_GSO_MASK); tp->af_specific->calc_md5_hash(opts.hash_location, md5, sk, NULL, skb); }#endif icsk->icsk_af_ops->send_check(sk, skb); if (likely(tcb->flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); if (skb->len != tcp_header_size) tcp_event_data_sent(tp, skb, sk); if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); /*调用发送接口queue_xmit发送报文,进入到ip层,如果失败返回错误码。在TCP中该接口实现函数为ip_queue_xmit()*/ err = icsk->icsk_af_ops->queue_xmit(skb); if (likely(err <= 0)) return err; tcp_enter_cwr(sk, 1); return net_xmit_eval(err);}from: